Esempio n. 1
0
void
linux32_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
	struct lwp *l = curlwp;
	struct proc *p = l->l_proc;
	struct trapframe *tf;
	struct linux32_rt_sigframe *fp, frame;
	int onstack, error;
	linux32_siginfo_t *lsi;
	int sig = ksi->ksi_signo;
	sig_t catcher = SIGACTION(p, sig).sa_handler;
	struct sigaltstack *sas = &l->l_sigstk;

	tf = l->l_md.md_regs;
	/* Do we need to jump onto the signal stack? */
	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;


	/* Allocate space for the signal handler context. */
	if (onstack)
		fp = (struct linux32_rt_sigframe *)((char *)sas->ss_sp +
		    sas->ss_size);
	else
		fp = (struct linux32_rt_sigframe *)tf->tf_rsp;
	fp--;

	/* Build stack frame for signal trampoline. */
	NETBSD32PTR32(frame.sf_handler, catcher);
	frame.sf_sig = native_to_linux32_signo[sig];
	NETBSD32PTR32(frame.sf_sip, &fp->sf_si);
	NETBSD32PTR32(frame.sf_ucp, &fp->sf_uc);

	DPRINTF(("rt: onstack = %d, fp = %p sig = %d rip = 0x%lx\n",
	    onstack, fp, sig, tf->tf_rip));

	lsi = &frame.sf_si;
	(void)memset(lsi, 0, sizeof(frame.sf_si));
	lsi->lsi_errno = native_to_linux32_errno[ksi->ksi_errno];
	lsi->lsi_code = native_to_linux_si_code(ksi->ksi_code);
	lsi->lsi_signo = frame.sf_sig;
	switch (lsi->lsi_signo) {
	case LINUX32_SIGILL:
	case LINUX32_SIGFPE:
	case LINUX32_SIGSEGV:
	case LINUX32_SIGBUS:
	case LINUX32_SIGTRAP:
		NETBSD32PTR32(lsi->lsi_addr, ksi->ksi_addr);
		break;
	case LINUX32_SIGCHLD:
		lsi->lsi_uid = ksi->ksi_uid;
		lsi->lsi_pid = ksi->ksi_pid;
		lsi->lsi_utime = ksi->ksi_utime;
		lsi->lsi_stime = ksi->ksi_stime;
		lsi->lsi_status = native_to_linux_si_status(ksi->ksi_code,
		    ksi->ksi_status);
		break;
	case LINUX32_SIGIO:
		lsi->lsi_band = ksi->ksi_band;
		lsi->lsi_fd = ksi->ksi_fd;
		break;
	default:
		lsi->lsi_uid = ksi->ksi_uid;
		lsi->lsi_pid = ksi->ksi_pid;
		if (lsi->lsi_signo == LINUX32_SIGALRM ||
		    lsi->lsi_signo >= LINUX32_SIGRTMIN)
			NETBSD32PTR32(lsi->lsi_value.sival_ptr,
			     ksi->ksi_value.sival_ptr);
		break;
	}

	/* Save register context. */
	linux32_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
	sendsig_reset(l, sig);
	mutex_exit(p->p_lock);
	error = copyout(&frame, fp, sizeof(frame));
	mutex_enter(p->p_lock);

	if (error != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}

	/*
	 * Build context to run handler in.
	 */
	tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_rip = (((long)p->p_sigctx.ps_sigcode) +
	    (linux32_rt_sigcode - linux32_sigcode)) & 0xffffffff;
	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_rflags &= ~PSL_CLEARSIG & 0xffffffff;
	tf->tf_rsp = (long)fp & 0xffffffff;
	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;

	/* Remember that we're now on the signal stack. */
	if (onstack)
		sas->ss_flags |= SS_ONSTACK;

	return;
}
Esempio n. 2
0
/*
 * Main routine for the callbacks notifications thread
 */
static void
i_mac_notify_thread(void *arg)
{
	mac_impl_t	*mip = arg;
	callb_cpr_t	cprinfo;
	mac_cb_t	*mcb;
	mac_cb_info_t	*mcbi;
	mac_notify_cb_t	*mncb;

	mcbi = &mip->mi_notify_cb_info;
	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
	    "i_mac_notify_thread");

	mutex_enter(mcbi->mcbi_lockp);

	for (;;) {
		uint32_t	bits;
		uint32_t	type;

		bits = mip->mi_notify_bits;
		if (bits == 0) {
			CALLB_CPR_SAFE_BEGIN(&cprinfo);
			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
			continue;
		}
		mip->mi_notify_bits = 0;
		if ((bits & (1 << MAC_NNOTE)) != 0) {
			/* request to quit */
			ASSERT(mip->mi_state_flags & MIS_DISABLED);
			break;
		}

		mutex_exit(mcbi->mcbi_lockp);

		/*
		 * Log link changes on the actual link, but then do reports on
		 * synthetic state (if part of a bridge).
		 */
		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
			link_state_t newstate;
			mac_handle_t mh;

			i_mac_log_link_state(mip);
			newstate = mip->mi_lowlinkstate;
			if (mip->mi_bridge_link != NULL) {
				mutex_enter(&mip->mi_bridge_lock);
				if ((mh = mip->mi_bridge_link) != NULL) {
					newstate = mac_bridge_ls_cb(mh,
					    newstate);
				}
				mutex_exit(&mip->mi_bridge_lock);
			}
			if (newstate != mip->mi_linkstate) {
				mip->mi_linkstate = newstate;
				bits |= 1 << MAC_NOTE_LINK;
			}
		}

		/*
		 * Do notification callbacks for each notification type.
		 */
		for (type = 0; type < MAC_NNOTE; type++) {
			if ((bits & (1 << type)) == 0) {
				continue;
			}

			if (mac_notify_cb_list[type] != NULL)
				(*mac_notify_cb_list[type])(mip);

			/*
			 * Walk the list of notifications.
			 */
			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
			    mcb = mcb->mcb_nextp) {
				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
				mncb->mncb_fn(mncb->mncb_arg, type);
			}
			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
			    &mip->mi_notify_cb_list);
		}

		mutex_enter(mcbi->mcbi_lockp);
	}

	mip->mi_state_flags |= MIS_NOTIFY_DONE;
	cv_broadcast(&mcbi->mcbi_cv);

	/* CALLB_CPR_EXIT drops the lock */
	CALLB_CPR_EXIT(&cprinfo);
	thread_exit();
}
Esempio n. 3
0
int
signotify(int cmd, siginfo_t *siginfo, signotify_id_t *sn_id)
{
	k_siginfo_t	info;
	signotify_id_t	id;
	proc_t		*p;
	proc_t		*cp = curproc;
	signotifyq_t	*snqp;
	struct cred	*cr;
	sigqueue_t	*sqp;
	sigqhdr_t	*sqh;
	u_longlong_t	sid;
	model_t 	datamodel = get_udatamodel();

	if (copyin(sn_id, &id, sizeof (signotify_id_t)))
		return (set_errno(EFAULT));

	if (id.sn_index >= _SIGNOTIFY_MAX || id.sn_index < 0)
		return (set_errno(EINVAL));

	switch (cmd) {
	case SN_PROC:
		/* get snid for the given user address of signotifyid_t */
		sid = get_sigid(cp, (caddr_t)sn_id);

		if (id.sn_pid > 0) {
			mutex_enter(&pidlock);
			if ((p = prfind(id.sn_pid)) != NULL) {
				mutex_enter(&p->p_lock);
				if (p->p_signhdr != NULL) {
					snqp = SIGN_PTR(p, id.sn_index);
					if (snqp->sn_snid == sid) {
						mutex_exit(&p->p_lock);
						mutex_exit(&pidlock);
						return (set_errno(EBUSY));
					}
				}
				mutex_exit(&p->p_lock);
			}
			mutex_exit(&pidlock);
		}

		if (copyin_siginfo(datamodel, siginfo, &info))
			return (set_errno(EFAULT));

		/* The si_code value must indicate the signal will be queued */
		if (!sigwillqueue(info.si_signo, info.si_code))
			return (set_errno(EINVAL));

		if (cp->p_signhdr == NULL) {
			/* Allocate signotify pool first time */
			sqh = sigqhdralloc(sizeof (signotifyq_t),
			    _SIGNOTIFY_MAX);
			mutex_enter(&cp->p_lock);
			if (cp->p_signhdr == NULL) {
				/* hang the pool head on proc */
				cp->p_signhdr = sqh;
			} else {
				/* another lwp allocated the pool, free ours */
				sigqhdrfree(sqh);
			}
		} else {
			mutex_enter(&cp->p_lock);
		}

		sqp = sigqalloc(cp->p_signhdr);
		if (sqp == NULL) {
			mutex_exit(&cp->p_lock);
			return (set_errno(EAGAIN));
		}
		cr = CRED();
		sqp->sq_info = info;
		sqp->sq_info.si_pid = cp->p_pid;
		sqp->sq_info.si_ctid = PRCTID(cp);
		sqp->sq_info.si_zoneid = getzoneid();
		sqp->sq_info.si_uid = crgetruid(cr);

		/* fill the signotifyq_t fields */
		((signotifyq_t *)sqp)->sn_snid = sid;

		mutex_exit(&cp->p_lock);

		/* complete the signotify_id_t fields */
		id.sn_index = (signotifyq_t *)sqp - SIGN_PTR(cp, 0);
		id.sn_pid = cp->p_pid;

		break;

	case SN_CANCEL:
	case SN_SEND:

		sid =  get_sigid(cp, (caddr_t)sn_id);
		mutex_enter(&pidlock);
		if ((id.sn_pid <= 0) || ((p = prfind(id.sn_pid)) == NULL)) {
			mutex_exit(&pidlock);
			return (set_errno(EINVAL));
		}
		mutex_enter(&p->p_lock);
		mutex_exit(&pidlock);

		if (p->p_signhdr == NULL) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		snqp = SIGN_PTR(p, id.sn_index);

		if (snqp->sn_snid == 0) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		if (snqp->sn_snid != sid) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		snqp->sn_snid = 0;

		/* cmd == SN_CANCEL or signo == 0 (SIGEV_NONE) */
		if (((sigqueue_t *)snqp)->sq_info.si_signo <= 0)
			cmd = SN_CANCEL;

		sigqsend(cmd, p, 0, (sigqueue_t *)snqp);
		mutex_exit(&p->p_lock);

		id.sn_pid = 0;
		id.sn_index = 0;

		break;

	default :
		return (set_errno(EINVAL));
	}

	if (copyout(&id, sn_id, sizeof (signotify_id_t)))
		return (set_errno(EFAULT));

	return (0);
}
Esempio n. 4
0
/*
 * smb2sr_work
 *
 * This function processes each SMB command in the current request
 * (which may be a compound request) building a reply containing
 * SMB reply messages, one-to-one with the SMB commands.  Some SMB
 * commands (change notify, blocking pipe read) may require both an
 * "interim response" and a later "async response" at completion.
 * In such cases, we'll encode the interim response in the reply
 * compound we're building, and put the (now async) command on a
 * list of commands that need further processing.  After we've
 * finished processing the commands in this compound and building
 * the compound reply, we'll send the compound reply, and finally
 * process the list of async commands.
 *
 * As we work our way through the compound request and reply,
 * we need to keep track of the bounds of the current request
 * and reply.  For the request, this uses an MBC_SHADOW_CHAIN
 * that begins at smb2_cmd_hdr.  The reply is appended to the
 * sr->reply chain starting at smb2_reply_hdr.
 *
 * This function must always free the smb request.
 */
void
smb2sr_work(struct smb_request *sr)
{
	smb_session_t		*session;
	uint32_t		msg_len;
	int			rc;
	boolean_t		disconnect = B_FALSE;

	session = sr->session;

	ASSERT(sr->tid_tree == 0);
	ASSERT(sr->uid_user == 0);
	ASSERT(sr->fid_ofile == 0);
	sr->smb_fid = (uint16_t)-1;

	/* temporary until we identify a user */
	sr->user_cr = zone_kcred();

	mutex_enter(&sr->sr_mutex);
	switch (sr->sr_state) {
	case SMB_REQ_STATE_SUBMITTED:
	case SMB_REQ_STATE_CLEANED_UP:
		sr->sr_state = SMB_REQ_STATE_ACTIVE;
		break;
	default:
		ASSERT(0);
		/* FALLTHROUGH */
	case SMB_REQ_STATE_CANCELED:
		goto complete_unlock_free;
	}
	mutex_exit(&sr->sr_mutex);

cmd_start:
	/*
	 * Reserve space for the reply header, and save the offset.
	 * The reply header will be overwritten later.
	 */
	sr->smb2_reply_hdr = sr->reply.chain_offset;
	(void) smb_mbc_encodef(&sr->reply, "#.", SMB2_HDR_SIZE);

	/*
	 * Decode the request header
	 *
	 * Most problems with decoding will result in the error
	 * STATUS_INVALID_PARAMETER.  If the decoding problem
	 * prevents continuing, we'll close the connection.
	 * [MS-SMB2] 3.3.5.2.6 Handling Incorrectly Formatted...
	 */
	sr->smb2_status = 0;
	sr->smb2_cmd_hdr = sr->command.chain_offset;
	if ((rc = smb2_decode_header(sr)) != 0) {
		cmn_err(CE_WARN, "clnt %s bad SMB2 header",
		    session->ip_addr_str);
		disconnect = B_TRUE;
		goto cleanup;
	}

	/*
	 * Figure out the length of data following the SMB2 header.
	 * It ends at either the next SMB2 header if there is one
	 * (smb2_next_command != 0) or at the end of the message.
	 */
	if (sr->smb2_next_command != 0) {
		/* [MS-SMB2] says this is 8-byte aligned */
		msg_len = sr->smb2_next_command;
		if ((msg_len & 7) != 0 || (msg_len < SMB2_HDR_SIZE) ||
		    ((sr->smb2_cmd_hdr + msg_len) > sr->command.max_bytes)) {
			cmn_err(CE_WARN, "clnt %s bad SMB2 next cmd",
			    session->ip_addr_str);
			disconnect = B_TRUE;
			goto cleanup;
		}
	} else {
		msg_len = sr->command.max_bytes - sr->smb2_cmd_hdr;
	}

	/*
	 * Setup a shadow chain for this SMB2 command, starting
	 * with the header and ending at either the next command
	 * or the end of the message.  Note that we've already
	 * decoded the header, so chain_offset is now positioned
	 * at the end of the header.  The signing check needs the
	 * entire SMB2 command, so we'll shadow starting at the
	 * smb2_cmd_hdr offset.  After the signing check, we'll
	 * move chain_offset up to the end of the header.
	 */
	(void) MBC_SHADOW_CHAIN(&sr->smb_data, &sr->command,
	    sr->smb2_cmd_hdr, msg_len);

	/*
	 * Verify SMB signature if signing is enabled and active now.
	 * [MS-SMB2] 3.3.5.2.4 Verifying the Signature
	 */
	if ((sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) != 0) {
		rc = smb2_sign_check_request(sr);
		if (rc != 0) {
			DTRACE_PROBE1(smb2__sign__check, smb_request_t, sr);
			if (session->signing.flags & SMB_SIGNING_CHECK) {
				smb2sr_put_error(sr, NT_STATUS_ACCESS_DENIED);
				goto cmd_finish;
			}
		}
	}

	/*
	 * Now that the signing check is done with smb_data,
	 * advance past the SMB2 header we decoded above.
	 * This leaves sr->smb_data correctly positioned
	 * for command-specific decoding in the dispatch
	 * function called next.
	 */
	sr->smb_data.chain_offset = sr->smb2_cmd_hdr + SMB2_HDR_SIZE;

	/*
	 * Default credit response.  Command handler may modify.
	 */
	sr->smb2_credit_response = sr->smb2_credit_request;

	/*
	 * Common dispatch (for sync & async)
	 */
	rc = smb2sr_dispatch(sr, NULL);
	switch (rc) {
	case SDRC_SUCCESS:
		break;
	default:
		/*
		 * SMB2 does not use the other dispatch return codes.
		 * If we see something else, log an event so we'll
		 * know something is returning bogus status codes.
		 * If you see these in the log, use dtrace to find
		 * the code returning something else.
		 */
#ifdef	DEBUG
		cmn_err(CE_NOTE, "smb2sr_dispatch -> 0x%x", rc);
#endif
		/* FALLTHROUGH */
	case SDRC_ERROR:
		if (sr->smb2_status == 0)
			sr->smb2_status = NT_STATUS_INTERNAL_ERROR;
		break;
	case SDRC_DROP_VC:
		disconnect = B_TRUE;
		goto cleanup;
	}

	/*
	 * If there's a next command, figure out where it starts,
	 * and fill in the next command offset for the reply.
	 * Note: We sanity checked smb2_next_command above
	 * (the offset to the next command).  Similarly set
	 * smb2_next_reply as the offset to the next reply.
	 */
cmd_finish:
	if (sr->smb2_next_command != 0) {
		sr->command.chain_offset =
		    sr->smb2_cmd_hdr + sr->smb2_next_command;
		sr->smb2_next_reply =
		    sr->reply.chain_offset - sr->smb2_reply_hdr;
	} else {
		sr->smb2_next_reply = 0;
	}

	/*
	 * Overwrite the SMB2 header for the response of
	 * this command (possibly part of a compound).
	 */
	sr->smb2_hdr_flags |= SMB2_FLAGS_SERVER_TO_REDIR;
	(void) smb2_encode_header(sr, B_TRUE);

	if (sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED)
		smb2_sign_reply(sr);

	if (sr->smb2_next_command != 0)
		goto cmd_start;

	/*
	 * We've done all the commands in this compound.
	 * Send it out.
	 */
	smb2_send_reply(sr);

	/*
	 * If any of the requests "went async", process those now.
	 */
	if (sr->sr_async_req != NULL) {
		smb2sr_do_async(sr);
	}

cleanup:
	if (disconnect) {
		smb_rwx_rwenter(&session->s_lock, RW_WRITER);
		switch (session->s_state) {
		case SMB_SESSION_STATE_DISCONNECTED:
		case SMB_SESSION_STATE_TERMINATED:
			break;
		default:
			smb_soshutdown(session->sock);
			session->s_state = SMB_SESSION_STATE_DISCONNECTED;
			break;
		}
		smb_rwx_rwexit(&session->s_lock);
	}


	mutex_enter(&sr->sr_mutex);
complete_unlock_free:
	sr->sr_state = SMB_REQ_STATE_COMPLETED;
	mutex_exit(&sr->sr_mutex);

	smb_request_free(sr);
}
Esempio n. 5
0
/*
 * Construct a znode+inode and initialize.
 *
 * This does not do a call to dmu_set_user() that is
 * up to the caller to do, in case you don't want to
 * return the znode
 */
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
    dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
    struct inode *dip)
{
	znode_t	*zp;
	struct inode *ip;
	uint64_t mode;
	uint64_t parent;
	sa_bulk_attr_t bulk[9];
	int count = 0;

	ASSERT(zsb != NULL);

	ip = new_inode(zsb->z_sb);
	if (ip == NULL)
		return (NULL);

	zp = ITOZ(ip);
	ASSERT(zp->z_dirlocks == NULL);
	ASSERT3P(zp->z_acl_cached, ==, NULL);
	ASSERT3P(zp->z_xattr_cached, ==, NULL);
	ASSERT3P(zp->z_xattr_parent, ==, NULL);
	zp->z_moved = 0;
	zp->z_sa_hdl = NULL;
	zp->z_unlinked = 0;
	zp->z_atime_dirty = 0;
	zp->z_mapcnt = 0;
	zp->z_id = db->db_object;
	zp->z_blksz = blksz;
	zp->z_seq = 0x7A4653;
	zp->z_sync_cnt = 0;
	zp->z_is_zvol = B_FALSE;
	zp->z_is_mapped = B_FALSE;
	zp->z_is_ctldir = B_FALSE;
	zp->z_is_stale = B_FALSE;

	zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);

	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
	    &zp->z_pflags, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
	    &parent, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
	    &zp->z_atime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, 8);

	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
		if (hdl == NULL)
			sa_handle_destroy(zp->z_sa_hdl);

		goto error;
	}

	zp->z_mode = mode;

	/*
	 * xattr znodes hold a reference on their unique parent
	 */
	if (dip && zp->z_pflags & ZFS_XATTR) {
		igrab(dip);
		zp->z_xattr_parent = ITOZ(dip);
	}

	ip->i_ino = obj;
	zfs_inode_update(zp);
	zfs_inode_set_ops(zsb, ip);

	/*
	 * The only way insert_inode_locked() can fail is if the ip->i_ino
	 * number is already hashed for this super block.  This can never
	 * happen because the inode numbers map 1:1 with the object numbers.
	 *
	 * The one exception is rolling back a mounted file system, but in
	 * this case all the active inode are unhashed during the rollback.
	 */
	VERIFY3S(insert_inode_locked(ip), ==, 0);

	mutex_enter(&zsb->z_znodes_lock);
	list_insert_tail(&zsb->z_all_znodes, zp);
	zsb->z_nr_znodes++;
	membar_producer();
	mutex_exit(&zsb->z_znodes_lock);

	unlock_new_inode(ip);
	return (zp);

error:
	unlock_new_inode(ip);
	iput(ip);
	return (NULL);
}
Esempio n. 6
0
/*
 * With the addition of reader-writer lock semantics to page_lock_es,
 * callers wanting an exclusive (writer) lock may prevent shared-lock
 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
 * In this case, when an exclusive lock cannot be acquired, p_selock's
 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
 * if the page is slated for retirement.
 *
 * The se and es parameters determine if the lock should be granted
 * based on the following decision table:
 *
 * Lock wanted   es flags     p_selock/SE_EWANTED  Action
 * ----------- -------------- -------------------  ---------
 * SE_EXCL        any [1][2]   unlocked/any        grant lock, clear SE_EWANTED
 * SE_EXCL        SE_EWANTED   any lock/any        deny, set SE_EWANTED
 * SE_EXCL        none         any lock/any        deny
 * SE_SHARED      n/a [2]        shared/0          grant
 * SE_SHARED      n/a [2]      unlocked/0          grant
 * SE_SHARED      n/a            shared/1          deny
 * SE_SHARED      n/a          unlocked/1          deny
 * SE_SHARED      n/a              excl/any        deny
 *
 * Notes:
 * [1] The code grants an exclusive lock to the caller and clears the bit
 *   SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
 *   bit's value.  This was deemed acceptable as we are not concerned about
 *   exclusive-lock starvation. If this ever becomes an issue, a priority or
 *   fifo mechanism should also be implemented. Meantime, the thread that
 *   set SE_EWANTED should be prepared to catch this condition and reset it
 *
 * [2] Retired pages may not be locked at any time, regardless of the
 *   dispostion of se, unless the es parameter has SE_RETIRED flag set.
 *
 * Notes on values of "es":
 *
 *   es & 1: page_lookup_create will attempt page relocation
 *   es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
 *       memory thread); this prevents reader-starvation of waiting
 *       writer thread(s) by giving priority to writers over readers.
 *   es & SE_RETIRED: caller wants to lock pages even if they are
 *       retired.  Default is to deny the lock if the page is retired.
 *
 * And yes, we know, the semantics of this function are too complicated.
 * It's on the list to be cleaned up.
 */
int
page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
{
	int		retval;
	kmutex_t	*pse = PAGE_SE_MUTEX(pp);
	int		upgraded;
	int		reclaim_it;

	ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);

	VM_STAT_ADD(page_lock_count);

	upgraded = 0;
	reclaim_it = 0;

	mutex_enter(pse);

	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));

	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
		mutex_exit(pse);
		VM_STAT_ADD(page_lock_retired);
		return (0);
	}

	if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
		se = SE_EXCL;
	}

	if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {

		reclaim_it = 1;
		if (se == SE_SHARED) {
			/*
			 * This is an interesting situation.
			 *
			 * Remember that p_free can only change if
			 * p_selock < 0.
			 * p_free does not depend on our holding `pse'.
			 * And, since we hold `pse', p_selock can not change.
			 * So, if p_free changes on us, the page is already
			 * exclusively held, and we would fail to get p_selock
			 * regardless.
			 *
			 * We want to avoid getting the share
			 * lock on a free page that needs to be reclaimed.
			 * It is possible that some other thread has the share
			 * lock and has left the free page on the cache list.
			 * pvn_vplist_dirty() does this for brief periods.
			 * If the se_share is currently SE_EXCL, we will fail
			 * to acquire p_selock anyway.  Blocking is the
			 * right thing to do.
			 * If we need to reclaim this page, we must get
			 * exclusive access to it, force the upgrade now.
			 * Again, we will fail to acquire p_selock if the
			 * page is not free and block.
			 */
			upgraded = 1;
			se = SE_EXCL;
			VM_STAT_ADD(page_lock_upgrade);
		}
	}

	if (se == SE_EXCL) {
		if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
			/*
			 * if the caller wants a writer lock (but did not
			 * specify exclusive access), and there is a pending
			 * writer that wants exclusive access, return failure
			 */
			retval = 0;
		} else if ((pp->p_selock & ~SE_EWANTED) == 0) {
			/* no reader/writer lock held */
			THREAD_KPRI_REQUEST();
			/* this clears our setting of the SE_EWANTED bit */
			pp->p_selock = SE_WRITER;
			retval = 1;
		} else {
			/* page is locked */
			if (es & SE_EXCL_WANTED) {
				/* set the SE_EWANTED bit */
				pp->p_selock |= SE_EWANTED;
			}
			retval = 0;
		}
	} else {
		retval = 0;
		if (pp->p_selock >= 0) {
			if ((pp->p_selock & SE_EWANTED) == 0) {
				pp->p_selock += SE_READER;
				retval = 1;
			}
		}
	}

	if (retval == 0) {
		if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
			VM_STAT_ADD(page_lock_deleted);
			mutex_exit(pse);
			return (retval);
		}

#ifdef VM_STATS
		VM_STAT_ADD(page_lock_miss);
		if (upgraded) {
			VM_STAT_ADD(page_lock_upgrade_failed);
		}
#endif
		if (lock) {
			VM_STAT_ADD(page_lock_miss_lock);
			mutex_exit(lock);
		}

		/*
		 * Now, wait for the page to be unlocked and
		 * release the lock protecting p_cv and p_selock.
		 */
		cv_wait(&pp->p_cv, pse);
		mutex_exit(pse);

		/*
		 * The page identity may have changed while we were
		 * blocked.  If we are willing to depend on "pp"
		 * still pointing to a valid page structure (i.e.,
		 * assuming page structures are not dynamically allocated
		 * or freed), we could try to lock the page if its
		 * identity hasn't changed.
		 *
		 * This needs to be measured, since we come back from
		 * cv_wait holding pse (the expensive part of this
		 * operation) we might as well try the cheap part.
		 * Though we would also have to confirm that dropping
		 * `lock' did not cause any grief to the callers.
		 */
		if (lock) {
			mutex_enter(lock);
		}
	} else {
		/*
		 * We have the page lock.
		 * If we needed to reclaim the page, and the page
		 * needed reclaiming (ie, it was free), then we
		 * have the page exclusively locked.  We may need
		 * to downgrade the page.
		 */
		ASSERT((upgraded) ?
		    ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
		mutex_exit(pse);

		/*
		 * We now hold this page's lock, either shared or
		 * exclusive.  This will prevent its identity from changing.
		 * The page, however, may or may not be free.  If the caller
		 * requested, and it is free, go reclaim it from the
		 * free list.  If the page can't be reclaimed, return failure
		 * so that the caller can start all over again.
		 *
		 * NOTE:page_reclaim() releases the page lock (p_selock)
		 *	if it can't be reclaimed.
		 */
		if (reclaim_it) {
			if (!page_reclaim(pp, lock)) {
				VM_STAT_ADD(page_lock_bad_reclaim);
				retval = 0;
			} else {
				VM_STAT_ADD(page_lock_reclaim);
				if (upgraded) {
					page_downgrade(pp);
				}
			}
		}
	}
	return (retval);
}
Esempio n. 7
0
/*
 * Find, take and return a mutex held by hat_page_demote().
 * Called by page_demote_vp_pages() before hat_page_demote() call and by
 * routines that want to block hat_page_demote() but can't do it
 * via locking all constituent pages.
 *
 * Return NULL if p_szc is 0.
 *
 * It should only be used for pages that can be demoted by hat_page_demote()
 * i.e. non swapfs file system pages.  The logic here is lifted from
 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
 * since the page is locked and not free.
 *
 * Hash of the root page is used to find the lock.
 * To find the root in the presense of hat_page_demote() chageing the location
 * of the root this routine relies on the fact that hat_page_demote() changes
 * root last.
 *
 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
 * returned pp's p_szc may be any value.
 */
kmutex_t *
page_szc_lock(page_t *pp)
{
	kmutex_t	*mtx;
	page_t		*rootpp;
	uint_t		szc;
	uint_t		rszc;
	uint_t		pszc = pp->p_szc;

	ASSERT(pp != NULL);
	ASSERT(PAGE_LOCKED(pp));
	ASSERT(!PP_ISFREE(pp));
	ASSERT(pp->p_vnode != NULL);
	ASSERT(!IS_SWAPFSVP(pp->p_vnode));
	ASSERT(!PP_ISKAS(pp));

again:
	if (pszc == 0) {
		VM_STAT_ADD(pszclck_stat[0]);
		return (NULL);
	}

	/* The lock lives in the root page */

	rootpp = PP_GROUPLEADER(pp, pszc);
	mtx = PAGE_SZC_MUTEX(rootpp);
	mutex_enter(mtx);

	/*
	 * since p_szc can only decrease if pp == rootpp
	 * rootpp will be always the same i.e we have the right root
	 * regardless of rootpp->p_szc.
	 * If location of pp's root didn't change after we took
	 * the lock we have the right root. return mutex hashed off it.
	 */
	if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
		VM_STAT_ADD(pszclck_stat[1]);
		return (mtx);
	}

	/*
	 * root location changed because page got demoted.
	 * locate the new root.
	 */
	if (rszc < pszc) {
		szc = pp->p_szc;
		ASSERT(szc < pszc);
		mutex_exit(mtx);
		pszc = szc;
		VM_STAT_ADD(pszclck_stat[2]);
		goto again;
	}

	VM_STAT_ADD(pszclck_stat[3]);
	/*
	 * current hat_page_demote not done yet.
	 * wait for it to finish.
	 */
	mutex_exit(mtx);
	rootpp = PP_GROUPLEADER(rootpp, rszc);
	mtx = PAGE_SZC_MUTEX(rootpp);
	mutex_enter(mtx);
	mutex_exit(mtx);
	ASSERT(rootpp->p_szc < rszc);
	goto again;
}
static void
netbsd32_sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask)
{
	struct lwp *l = curlwp;
	struct proc *p = l->l_proc;
	struct trapframe *tf;
	int sig = ksi->ksi_signo;
	sig_t catcher = SIGACTION(p, sig).sa_handler;
	struct netbsd32_sigframe_sigcontext *fp, frame;
	int onstack, error;
	struct sigacts *ps = p->p_sigacts;

	tf = l->l_md.md_regs;

	/* Do we need to jump onto the signal stack? */
	onstack =
	    (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;

	/* Allocate space for the signal handler context. */
	if (onstack)
		fp = (struct netbsd32_sigframe_sigcontext *)
		    ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size);
	else
		fp = (struct netbsd32_sigframe_sigcontext *)tf->tf_rsp;
	fp--;

	/* Build stack frame for signal trampoline. */
	switch (ps->sa_sigdesc[sig].sd_vers) {
	case 0:
		frame.sf_ra = (uint32_t)(u_long)p->p_sigctx.ps_sigcode;
		break;
	case 1:
		frame.sf_ra = (uint32_t)(u_long)ps->sa_sigdesc[sig].sd_tramp;
		break;
	default:
		/* Don't know what trampoline version; kill it. */
		sigexit(l, SIGILL);
	}
	frame.sf_signum = sig;
	frame.sf_code = ksi->ksi_trap;
	frame.sf_scp = (uint32_t)(u_long)&fp->sf_sc;

	frame.sf_sc.sc_ds = tf->tf_ds;
	frame.sf_sc.sc_es = tf->tf_es;
	frame.sf_sc.sc_fs = tf->tf_fs;
	frame.sf_sc.sc_gs = tf->tf_gs;

	frame.sf_sc.sc_eflags = tf->tf_rflags;
	frame.sf_sc.sc_edi = tf->tf_rdi;
	frame.sf_sc.sc_esi = tf->tf_rsi;
	frame.sf_sc.sc_ebp = tf->tf_rbp;
	frame.sf_sc.sc_ebx = tf->tf_rbx;
	frame.sf_sc.sc_edx = tf->tf_rdx;
	frame.sf_sc.sc_ecx = tf->tf_rcx;
	frame.sf_sc.sc_eax = tf->tf_rax;
	frame.sf_sc.sc_eip = tf->tf_rip;
	frame.sf_sc.sc_cs = tf->tf_cs;
	frame.sf_sc.sc_esp = tf->tf_rsp;
	frame.sf_sc.sc_ss = tf->tf_ss;
	frame.sf_sc.sc_trapno = tf->tf_trapno;
	frame.sf_sc.sc_err = tf->tf_err;

	/* Save signal stack. */
	frame.sf_sc.sc_onstack = l->l_sigstk.ss_flags & SS_ONSTACK;

	/* Save signal mask. */
	frame.sf_sc.sc_mask = *mask;

	sendsig_reset(l, sig);

	mutex_exit(p->p_lock);
	error = copyout(&frame, fp, sizeof(frame));
	mutex_enter(p->p_lock);

	if (error != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}

	/*
	 * Build context to run handler in.
	 */
	tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL);

	/* Ensure FP state is reset, if FP is used. */
	l->l_md.md_flags &= ~MDL_USEDFPU;

	tf->tf_rip = (uint64_t)catcher;
	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL);
	tf->tf_rflags &= ~PSL_CLEARSIG;
	tf->tf_rsp = (uint64_t)fp;
	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);

	/* Remember that we're now on the signal stack. */
	if (onstack)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS32) {
		/*
		 * process has given an invalid address for the
		 * handler. Stop it, but do not do it before so
		 * we can return the right info to userland (or in core dump)
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}
}
static void
netbsd32_sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
{
	struct lwp *l = curlwp;
	struct proc *p = l->l_proc;
	struct sigacts *ps = p->p_sigacts;
	int onstack, error;
	int sig = ksi->ksi_signo;
	struct netbsd32_sigframe_siginfo *fp, frame;
	sig_t catcher = SIGACTION(p, sig).sa_handler;
	struct trapframe *tf = l->l_md.md_regs;

	/* Do we need to jump onto the signal stack? */
	onstack =
	    (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;

	/* Allocate space for the signal handler context. */
	if (onstack)
		fp = (struct netbsd32_sigframe_siginfo *)
		    ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size);
	else
		fp = (struct netbsd32_sigframe_siginfo *)tf->tf_rsp;

	fp--;

	/* Build stack frame for signal trampoline. */
	switch (ps->sa_sigdesc[sig].sd_vers) {
	case 0:		/* handled by sendsig_sigcontext */
	case 1:		/* handled by sendsig_sigcontext */
	default:	/* unknown version */
		printf("nsendsig: bad version %d\n",
		    ps->sa_sigdesc[sig].sd_vers);
		sigexit(l, SIGILL);
	case 2:
		break;
	}

	frame.sf_ra = (uint32_t)(uintptr_t)ps->sa_sigdesc[sig].sd_tramp;
	frame.sf_signum = sig;
	frame.sf_sip = (uint32_t)(uintptr_t)&fp->sf_si;
	frame.sf_ucp = (uint32_t)(uintptr_t)&fp->sf_uc;
	netbsd32_si_to_si32(&frame.sf_si, (const siginfo_t *)&ksi->ksi_info);
	frame.sf_uc.uc_flags = _UC_SIGMASK;
	frame.sf_uc.uc_sigmask = *mask;
	frame.sf_uc.uc_link = (uint32_t)(uintptr_t)l->l_ctxlink;
	frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
	    ? _UC_SETSTACK : _UC_CLRSTACK;
	memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
	sendsig_reset(l, sig);

	mutex_exit(p->p_lock);
	cpu_getmcontext32(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
	error = copyout(&frame, fp, sizeof(frame));
	mutex_enter(p->p_lock);

	if (error != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}

	/*
	 * Build context to run handler in.
	 */
	tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL);
	tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL);

	tf->tf_rip = (uint64_t)catcher;
	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL);
	tf->tf_rflags &= ~PSL_CLEARSIG;
	tf->tf_rsp = (uint64_t)fp;
	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);

	/* Ensure FP state is reset, if FP is used. */
	l->l_md.md_flags &= ~MDL_USEDFPU;

	/* Remember that we're now on the signal stack. */
	if (onstack)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS32) {
		/*
		 * process has given an invalid address for the
		 * handler. Stop it, but do not do it before so
		 * we can return the right info to userland (or in core dump)
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}
}
Esempio n. 10
0
/*
 * Health monitor for a single interface.
 *
 * The secondary sends ping RPCs to the primary.
 * The primary just stores the results and updates its structures.
 */
static void
rdc_health_thread(void *arg)
{
	rdc_if_t *ip = (rdc_if_t *)arg;
	struct rdc_ping ping;
	struct rdc_ping6 ping6;
	struct timeval t;
	int down = 1;
	int ret, err;
	int sec = 0;
	char ifaddr[RDC_MAXADDR];
	char r_ifaddr[RDC_MAXADDR];
	uint16_t *sp;

	bcopy(ip->ifaddr.buf, ifaddr, ip->ifaddr.len);
	sp = (uint16_t *)ifaddr;
	*sp = htons(*sp);
	bcopy(ip->r_ifaddr.buf, r_ifaddr, ip->r_ifaddr.len);
	sp = (uint16_t *)r_ifaddr;
	*sp = htons(*sp);

	while ((ip->exiting != 1) && (net_exit != ATM_EXIT)) {
		delay(HZ);

		/* setup RPC timeout */

		t.tv_sec = rdc_rpc_tmout;
		t.tv_usec = 0;

		if (ip->issecondary && !ip->no_ping) {
			if (ip->rpc_version < RDC_VERSION7) {
				bcopy(ip->r_ifaddr.buf, ping6.p_ifaddr,
				    RDC_MAXADDR);
			/* primary ifaddr */
				bcopy(ip->ifaddr.buf, ping6.s_ifaddr,
				    RDC_MAXADDR);
			/* secondary ifaddr */
				err = rdc_clnt_call_any(ip->srv, ip,
				    RDCPROC_PING4, xdr_rdc_ping6,
				    (char *)&ping6, xdr_int, (char *)&ret, &t);
			} else {
				ping.p_ifaddr.buf = r_ifaddr;
				ping.p_ifaddr.len = ip->r_ifaddr.len;
				ping.p_ifaddr.maxlen = ip->r_ifaddr.len;
				ping.s_ifaddr.buf = ifaddr;
				ping.s_ifaddr.len = ip->ifaddr.len;
				ping.s_ifaddr.maxlen = ip->ifaddr.len;
				err = rdc_clnt_call_any(ip->srv, ip,
				    RDCPROC_PING4, xdr_rdc_ping, (char *)&ping,
				    xdr_int, (char *)&ret, &t);
			}


			if (err || ret) {
				/* RPC failed - link is down */
				if (!down && !ip->isprimary) {
					/*
					 * don't print messages if also
					 * a primary - the primary will
					 * take care of it.
					 */
					rdc_if_down(ip);
					down = 1;
				}
				rdc_dump_alloc_bufs(ip);
				ip->no_ping = 1;

				/*
				 * Start back at the max possible version
				 * since the remote server could come back
				 * on a different protocol version.
				 */
				mutex_enter(&rdc_ping_lock);
				ip->rpc_version = RDC_VERS_MAX;
				mutex_exit(&rdc_ping_lock);
			} else {
				if (down && !ip->isprimary) {
					/*
					 * was failed, but now ok
					 *
					 * don't print messages if also
					 * a primary - the primary will
					 * take care of it.
					 */
					rdc_if_up(ip);
					down = 0;
				}
			}
		}
		if (!ip->isprimary && down && ++sec == 5) {
				sec = 0;
				rdc_dump_alloc_bufs(ip);
		}

		if (ip->isprimary)
			rdc_update_health(ip);
	}

	/* signal that this thread is done */
	ip->exiting = 2;
}
int
compat_13_netbsd32_sigreturn(struct lwp *l, const struct compat_13_netbsd32_sigreturn_args *uap, register_t *retval)
{
	/* {
		syscallarg(struct netbsd32_sigcontext13 *) sigcntxp;
	} */
	struct proc *p = l->l_proc;
	struct netbsd32_sigcontext13 *scp, context;
	struct trapframe *tf;
	sigset_t mask;
	int error;

	/*
	 * The trampoline code hands us the context.
	 * It is unsafe to keep track of it ourselves, in the event that a
	 * program jumps out of a signal handler.
	 */
	scp = (struct netbsd32_sigcontext13 *)NETBSD32PTR64(SCARG(uap, sigcntxp));
	if (copyin((void *)scp, &context, sizeof(*scp)) != 0)
		return (EFAULT);

	/* Restore register context. */
	tf = l->l_md.md_regs;

	/*
	 * Check for security violations.
	 */
	error = check_sigcontext32(l, (const struct netbsd32_sigcontext *)&context);
	if (error != 0)
		return error;

	tf->tf_gs = context.sc_gs;
	tf->tf_fs = context.sc_fs;		
	tf->tf_es = context.sc_es;
	tf->tf_ds = context.sc_ds;
	tf->tf_rflags = context.sc_eflags;
	tf->tf_rdi = context.sc_edi;
	tf->tf_rsi = context.sc_esi;
	tf->tf_rbp = context.sc_ebp;
	tf->tf_rbx = context.sc_ebx;
	tf->tf_rdx = context.sc_edx;
	tf->tf_rcx = context.sc_ecx;
	tf->tf_rax = context.sc_eax;
	tf->tf_rip = context.sc_eip;
	tf->tf_cs = context.sc_cs;
	tf->tf_rsp = context.sc_esp;
	tf->tf_ss = context.sc_ss;

	mutex_enter(p->p_lock);
	/* Restore signal stack. */
	if (context.sc_onstack & SS_ONSTACK)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	else
		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
	/* Restore signal mask. */
	native_sigset13_to_sigset((sigset13_t *)&context.sc_mask, &mask);
	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
	mutex_exit(p->p_lock);

	return (EJUSTRETURN);
}
/*
 * System call to cleanup state after a signal
 * has been taken.  Reset signal mask and
 * stack state from context left by sendsig (above).
 * Return to previous pc and psl as specified by
 * context left by sendsig. Check carefully to
 * make sure that the user has not modified the
 * psl to gain improper privileges or to cause
 * a machine fault.
 */
int
compat_13_sys_sigreturn(struct lwp *l, const struct compat_13_sys_sigreturn_args *uap, register_t *retval)
{
	/* {
		syscallarg(struct sigcontext13 *) sigcntxp;
	} */
	struct proc *p = l->l_proc;
	struct sigcontext13 *scp;
	struct frame *frame;
	struct sigcontext13 tsigc;
	sigset_t mask;

	/*
	 * The trampoline code hands us the context.
	 * It is unsafe to keep track of it ourselves, in the event that a
	 * program jumps out of a signal handler.
	 */
	scp = SCARG(uap, sigcntxp);
	if ((int)scp & 1)
		return EINVAL;

	if (copyin(scp, &tsigc, sizeof(tsigc)) != 0)
		return EFAULT;
	scp = &tsigc;

	/* Make sure the user isn't pulling a fast one on us! */
	if ((scp->sc_ps & (PSL_MBZ|PSL_IPL|PSL_S)) != 0)
		return EINVAL;

	/* Restore register context. */
	frame = (struct frame *)l->l_md.md_regs;

	/*
	 * We only support restoring the sigcontext13 in this call.
	 * We are not called from the sigcode (per sendsig()), so
	 * we will not have a sigstate to restore.
	 */
	if (scp->sc_ap != 0)
		return EINVAL;

	/*
	 * Restore the user supplied information.
	 * This should be at the last so that the error (EINVAL)
	 * is reported to the sigreturn caller, not to the
	 * jump destination.
	 */

	frame->f_regs[SP] = scp->sc_sp;
	frame->f_regs[A6] = scp->sc_fp;
	frame->f_pc = scp->sc_pc;
	frame->f_sr = scp->sc_ps;

	mutex_enter(p->p_lock);

	/* Restore signal stack. */
	if (scp->sc_onstack & SS_ONSTACK)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	else
		l->l_sigstk.ss_flags &= ~SS_ONSTACK;

	/* Restore signal mask. */
	native_sigset13_to_sigset(&scp->sc_mask, &mask);
	(void)sigprocmask1(l, SIG_SETMASK, &mask, 0);

	mutex_exit(p->p_lock);

	return EJUSTRETURN;
}
Esempio n. 13
0
void
linux32_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
{
	struct lwp *l = curlwp;
	struct proc *p = l->l_proc;
	struct trapframe *tf;
	struct linux32_sigframe *fp, frame;
	int onstack, error;
	int sig = ksi->ksi_signo;
	sig_t catcher = SIGACTION(p, sig).sa_handler;
	struct sigaltstack *sas = &l->l_sigstk;

	tf = l->l_md.md_regs;
	/* Do we need to jump onto the signal stack? */
	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;


	/* Allocate space for the signal handler context. */
	if (onstack)
		fp = (struct linux32_sigframe *)((char *)sas->ss_sp +
		    sas->ss_size);
	else
		fp = (struct linux32_sigframe *)tf->tf_rsp;
	fp--;

	DPRINTF(("old: onstack = %d, fp = %p sig = %d rip = 0x%lx\n",
	    onstack, fp, sig, tf->tf_rip));

	/* Build stack frame for signal trampoline. */
	NETBSD32PTR32(frame.sf_handler, catcher);
	frame.sf_sig = native_to_linux32_signo[sig];

	linux32_save_sigcontext(l, tf, mask, &frame.sf_sc);

	sendsig_reset(l, sig);
	mutex_exit(p->p_lock);
	error = copyout(&frame, fp, sizeof(frame));
	mutex_enter(p->p_lock);

	if (error != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}

	/*
	 * Build context to run handler in.
	 */
	tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_rip = ((long)p->p_sigctx.ps_sigcode) & 0xffffffff;
	tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL) & 0xffffffff;
	tf->tf_rflags &= ~PSL_CLEARSIG & 0xffffffff;
	tf->tf_rsp = (long)fp & 0xffffffff;
	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff;

	/* Remember that we're now on the signal stack. */
	if (onstack)
		sas->ss_flags |= SS_ONSTACK;

	return;
}
Esempio n. 14
0
static int
linux32_restore_sigcontext(struct lwp *l, struct linux32_sigcontext *scp,
			register_t *retval)
{	
	struct trapframe *tf;
	struct proc *p = l->l_proc;
	struct sigaltstack *sas = &l->l_sigstk;
	struct pcb *pcb;
	sigset_t mask;
	ssize_t ss_gap;
	register_t fssel, gssel;

	/* Restore register context. */
	tf = l->l_md.md_regs;
	pcb = lwp_getpcb(l);
	DPRINTF(("sigreturn enter rsp=0x%lx rip=0x%lx\n", tf->tf_rsp,
		 tf->tf_rip));

	/*
	 * Check for security violations.
	 */
	if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0 ||
	    !USERMODE(scp->sc_cs, scp->sc_eflags))
		return EINVAL;

	if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs) &&
	    !(VALID_USER_FSEL32(scp->sc_fs) && pcb->pcb_fs != 0))
		return EINVAL;

	if (scp->sc_gs != 0 && !VALID_USER_DSEL32(scp->sc_gs) &&
	    !(VALID_USER_GSEL32(scp->sc_gs) && pcb->pcb_gs != 0))
		return EINVAL;

	if (scp->sc_es != 0 && !VALID_USER_DSEL32(scp->sc_es))
		return EINVAL;

	if (!VALID_USER_DSEL32(scp->sc_ds) ||
	    !VALID_USER_DSEL32(scp->sc_ss))
		return EINVAL;

	if (scp->sc_eip >= VM_MAXUSER_ADDRESS32)
		return EINVAL;

	gssel = (register_t)scp->sc_gs & 0xffff;
	fssel = (register_t)scp->sc_fs & 0xffff;
	cpu_fsgs_reload(l, fssel, gssel);
	tf->tf_es = (register_t)scp->sc_es & 0xffff;
	tf->tf_ds = (register_t)scp->sc_ds & 0xffff;
	tf->tf_rflags &= ~PSL_USER;
	tf->tf_rflags |= ((register_t)scp->sc_eflags & PSL_USER);
	tf->tf_rdi = (register_t)scp->sc_edi & 0xffffffff;
	tf->tf_rsi = (register_t)scp->sc_esi & 0xffffffff;
	tf->tf_rbp = (register_t)scp->sc_ebp & 0xffffffff;
	tf->tf_rbx = (register_t)scp->sc_ebx & 0xffffffff;
	tf->tf_rdx = (register_t)scp->sc_edx & 0xffffffff;
	tf->tf_rcx = (register_t)scp->sc_ecx & 0xffffffff;
	tf->tf_rax = (register_t)scp->sc_eax & 0xffffffff;
	tf->tf_rip = (register_t)scp->sc_eip & 0xffffffff;
	tf->tf_cs = (register_t)scp->sc_cs & 0xffff;
	tf->tf_rsp = (register_t)scp->sc_esp_at_signal & 0xffffffff;
	tf->tf_ss = (register_t)scp->sc_ss & 0xffff;

	mutex_enter(p->p_lock);

	/* Restore signal stack. */
	ss_gap = (ssize_t)
	    ((char *)NETBSD32IPTR64(scp->sc_esp_at_signal) 
	     - (char *)sas->ss_sp);
	if (ss_gap >= 0 && ss_gap < sas->ss_size)
		sas->ss_flags |= SS_ONSTACK;
	else
		sas->ss_flags &= ~SS_ONSTACK;

	/* Restore signal mask. */
	linux32_old_to_native_sigset(&mask, &scp->sc_mask);
	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);

	mutex_exit(p->p_lock);

	DPRINTF(("linux32_sigreturn: rip = 0x%lx, rsp = 0x%lx, flags = 0x%lx\n",
	    tf->tf_rip, tf->tf_rsp, tf->tf_rflags));
	return EJUSTRETURN;
}
Esempio n. 15
0
/*
 * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
 * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
 * If it's non-NULL, we use it to indicate whether the znode needs deletion,
 * and it's the caller's job to do it.
 */
int
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
	boolean_t *unlinkedp)
{
	znode_t *dzp = dl->dl_dzp;
	zfs_sb_t *zsb = ZTOZSB(dzp);
	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
	boolean_t unlinked = B_FALSE;
	sa_bulk_attr_t bulk[5];
	uint64_t mtime[2], ctime[2];
	int count = 0;
	int error;

#ifdef HAVE_DNLC
	dnlc_remove(ZTOI(dzp), dl->dl_name);
#endif /* HAVE_DNLC */

	if (!(flag & ZRENAMING)) {
		mutex_enter(&zp->z_lock);

		if (zp_is_dir && !zfs_dirempty(zp)) {
			mutex_exit(&zp->z_lock);
			return (SET_ERROR(ENOTEMPTY));
		}

		/*
		 * If we get here, we are going to try to remove the object.
		 * First try removing the name from the directory; if that
		 * fails, return the error.
		 */
		error = zfs_dropname(dl, zp, dzp, tx, flag);
		if (error != 0) {
			mutex_exit(&zp->z_lock);
			return (error);
		}

		if (zp->z_links <= zp_is_dir) {
			zfs_panic_recover("zfs: link count on %lu is %u, "
			    "should be at least %u", zp->z_id,
			    (int)zp->z_links, zp_is_dir + 1);
			zp->z_links = zp_is_dir + 1;
		}
		if (--zp->z_links == zp_is_dir) {
			zp->z_unlinked = B_TRUE;
			zp->z_links = 0;
			unlinked = B_TRUE;
		} else {
			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
			    NULL, &ctime, sizeof (ctime));
			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
			    B_TRUE);
		}
		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
		    NULL, &zp->z_links, sizeof (zp->z_links));
		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
		count = 0;
		ASSERT(error == 0);
		mutex_exit(&zp->z_lock);
	} else {
		error = zfs_dropname(dl, zp, dzp, tx, flag);
		if (error != 0)
			return (error);
	}

	mutex_enter(&dzp->z_lock);
	dzp->z_size--;		/* one dirent removed */
	dzp->z_links -= zp_is_dir;	/* ".." link from zp */
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
	    NULL, &dzp->z_links, sizeof (dzp->z_links));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
	    NULL, &dzp->z_size, sizeof (dzp->z_size));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
	    NULL, ctime, sizeof (ctime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
	    NULL, mtime, sizeof (mtime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);
	mutex_exit(&dzp->z_lock);

	if (unlinkedp != NULL)
		*unlinkedp = unlinked;
	else if (unlinked)
		zfs_unlinked_add(zp, tx);

	return (0);
}
int
cpu_setmcontext32(struct lwp *l, const mcontext32_t *mcp, unsigned int flags)
{
	struct trapframe *tf = l->l_md.md_regs;
	const __greg32_t *gr = mcp->__gregs;
	struct proc *p = l->l_proc;
	int error;

	/* Restore register context, if any. */
	if ((flags & _UC_CPU) != 0) {
		/*
		 * Check for security violations.
		 */
		error = cpu_mcontext32_validate(l, mcp);
		if (error != 0)
			return error;

		cpu_fsgs_reload(l, gr[_REG32_FS], gr[_REG32_GS]);
		tf->tf_es = gr[_REG32_ES];
		tf->tf_ds = gr[_REG32_DS];
		/* Only change the user-alterable part of eflags */
		tf->tf_rflags &= ~PSL_USER;
		tf->tf_rflags |= (gr[_REG32_EFL] & PSL_USER);
		tf->tf_rdi    = gr[_REG32_EDI];
		tf->tf_rsi    = gr[_REG32_ESI];
		tf->tf_rbp    = gr[_REG32_EBP];
		tf->tf_rbx    = gr[_REG32_EBX];
		tf->tf_rdx    = gr[_REG32_EDX];
		tf->tf_rcx    = gr[_REG32_ECX];
		tf->tf_rax    = gr[_REG32_EAX];
		tf->tf_rip    = gr[_REG32_EIP];
		tf->tf_cs     = gr[_REG32_CS];
		tf->tf_rsp    = gr[_REG32_UESP];
		tf->tf_ss     = gr[_REG32_SS];
	}

	if ((flags & _UC_TLSBASE) != 0)
		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);

	/* Restore floating point register context, if any. */
	if ((flags & _UC_FPU) != 0) {
		struct pcb *pcb = lwp_getpcb(l);

		/*
		 * If we were using the FPU, forget that we were.
		 */
		if (pcb->pcb_fpcpu != NULL) {
			fpusave_lwp(l, false);
		}
		memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs,
		    sizeof (pcb->pcb_savefpu.fp_fxsave));
		/* If not set already. */
		l->l_md.md_flags |= MDL_USEDFPU;
	}

	mutex_enter(p->p_lock);
	if (flags & _UC_SETSTACK)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	if (flags & _UC_CLRSTACK)
		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
	mutex_exit(p->p_lock);

	return (0);
}
Esempio n. 17
0
/* ARGSUSED */
int
mfs_start(struct mount *mp, int flags)
{
	struct vnode *vp;
	struct mfsnode *mfsp;
	struct proc *p;
	struct buf *bp;
	void *base;
	int sleepreturn = 0, refcnt, error;
	ksiginfoq_t kq;

	/*
	 * Ensure that file system is still mounted when getting mfsnode.
	 * Add a reference to the mfsnode to prevent it disappearing in
	 * this routine.
	 */
	if ((error = vfs_busy(mp, NULL)) != 0)
		return error;
	vp = VFSTOUFS(mp)->um_devvp;
	mfsp = VTOMFS(vp);
	mutex_enter(&mfs_lock);
	mfsp->mfs_refcnt++;
	mutex_exit(&mfs_lock);
	vfs_unbusy(mp, false, NULL);

	base = mfsp->mfs_baseoff;
	mutex_enter(&mfs_lock);
	while (mfsp->mfs_shutdown != 1) {
		while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) {
			mutex_exit(&mfs_lock);
			mfs_doio(bp, base);
			mutex_enter(&mfs_lock);
		}
		/*
		 * If a non-ignored signal is received, try to unmount.
		 * If that fails, or the filesystem is already in the
		 * process of being unmounted, clear the signal (it has been
		 * "processed"), otherwise we will loop here, as tsleep
		 * will always return EINTR/ERESTART.
		 */
		if (sleepreturn != 0) {
			mutex_exit(&mfs_lock);
			if (dounmount(mp, 0, curlwp) != 0) {
				p = curproc;
				ksiginfo_queue_init(&kq);
				mutex_enter(p->p_lock);
				sigclearall(p, NULL, &kq);
				mutex_exit(p->p_lock);
				ksiginfo_queue_drain(&kq);
			}
			sleepreturn = 0;
			mutex_enter(&mfs_lock);
			continue;
		}

		sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock);
	}
	KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL);
	refcnt = --mfsp->mfs_refcnt;
	mutex_exit(&mfs_lock);
	if (refcnt == 0) {
		bufq_free(mfsp->mfs_buflist);
		cv_destroy(&mfsp->mfs_cv);
		kmem_free(mfsp, sizeof(*mfsp));
	}
	return (sleepreturn);
}
Esempio n. 18
0
/*ARGSUSED*/
int
dump_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rvalp)
{
    uint64_t size;
    uint64_t dumpsize_in_pages;
    int error = 0;
    char *pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
    char uuidbuf[36 + 1];
    size_t len;
    vnode_t *vp;

    switch (cmd) {
    case DIOCGETDUMPSIZE:
        if (dump_conflags & DUMP_ALL)
            size = ptob((uint64_t)physmem) / DUMP_COMPRESS_RATIO;
        else {
            /*
             * We can't give a good answer for the DUMP_CURPROC
             * because we won't know which process to use until it
             * causes a panic.  We'll therefore punt and give the
             * caller the size for the kernel.
             *
             * This kernel size equation takes care of the
             * boot time kernel footprint and also accounts
             * for availrmem changes due to user explicit locking.
             * Refer to common/vm/vm_page.c for an explanation
             * of these counters.
             */
            dumpsize_in_pages = (physinstalled - obp_pages -
                                 availrmem -
                                 anon_segkp_pages_locked -
                                 k_anoninfo.ani_mem_resv -
                                 pages_locked -
                                 pages_claimed -
                                 pages_useclaim);

            /*
             * Protect against vm vagaries.
             */
            if (dumpsize_in_pages > (uint64_t)physmem)
                dumpsize_in_pages = (uint64_t)physmem;

            size = ptob(dumpsize_in_pages) / DUMP_COMPRESS_RATIO;
        }
        if (copyout(&size, (void *)arg, sizeof (size)) < 0)
            error = EFAULT;
        break;

    case DIOCGETCONF:
        mutex_enter(&dump_lock);
        *rvalp = dump_conflags;
        if (dumpvp && !(dumpvp->v_flag & VISSWAP))
            *rvalp |= DUMP_EXCL;
        mutex_exit(&dump_lock);
        break;

    case DIOCSETCONF:
        mutex_enter(&dump_lock);
        if (arg == DUMP_KERNEL || arg == DUMP_ALL ||
                arg == DUMP_CURPROC)
            dump_conflags = arg;
        else
            error = EINVAL;
        mutex_exit(&dump_lock);
        break;

    case DIOCGETDEV:
        mutex_enter(&dump_lock);
        if (dumppath == NULL) {
            mutex_exit(&dump_lock);
            error = ENODEV;
            break;
        }
        (void) strcpy(pathbuf, dumppath);
        mutex_exit(&dump_lock);
        error = copyoutstr(pathbuf, (void *)arg, MAXPATHLEN, NULL);
        break;

    case DIOCSETDEV:
    case DIOCTRYDEV:
        if ((error = copyinstr((char *)arg, pathbuf, MAXPATHLEN,
                               NULL)) != 0 || (error = lookupname(pathbuf, UIO_SYSSPACE,
                                               FOLLOW, NULLVPP, &vp)) != 0)
            break;
        mutex_enter(&dump_lock);
        if (vp->v_type == VBLK)
            error = dumpinit(vp, pathbuf, cmd == DIOCTRYDEV);
        else
            error = ENOTBLK;
        mutex_exit(&dump_lock);
        VN_RELE(vp);
        break;

    case DIOCDUMP:
        mutex_enter(&dump_lock);
        if (dumpvp == NULL)
            error = ENODEV;
        else if (dumpvp->v_flag & VISSWAP)
            error = EBUSY;
        else
            dumpsys();
        mutex_exit(&dump_lock);
        break;

    case DIOCSETUUID:
        if ((error = copyinstr((char *)arg, uuidbuf, sizeof (uuidbuf),
                               &len)) != 0)
            break;

        if (len != 37) {
            error = EINVAL;
            break;
        }

        error = dump_set_uuid(uuidbuf);
        break;

    case DIOCGETUUID:
        error = copyoutstr(dump_get_uuid(), (void *)arg, 37, NULL);
        break;

    case DIOCRMDEV:
        mutex_enter(&dump_lock);
        if (dumpvp != NULL)
            dumpfini();
        mutex_exit(&dump_lock);
        break;

    default:
        error = ENXIO;
    }

    kmem_free(pathbuf, MAXPATHLEN);
    return (error);
}
Esempio n. 19
0
/*
 * Read the comments inside of page_lock_es() carefully.
 *
 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
 * This is used by threads subject to reader-starvation (eg. memory delete).
 *
 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
 * it is expected that it will retry at a later time.  Threads that will
 * not retry the lock *must* call page_lock_clr_exclwanted to clear the
 * SE_EWANTED bit.  (When a thread using SE_EXCL_WANTED obtains the lock,
 * the bit is cleared.)
 */
int
page_try_reclaim_lock(page_t *pp, se_t se, int es)
{
	kmutex_t *pse = PAGE_SE_MUTEX(pp);
	selock_t old;

	mutex_enter(pse);

	old = pp->p_selock;

	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));

	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
		mutex_exit(pse);
		VM_STAT_ADD(page_trylock_failed);
		return (0);
	}

	if (se == SE_SHARED && es == 1 && old == 0) {
		se = SE_EXCL;
	}

	if (se == SE_SHARED) {
		if (!PP_ISFREE(pp)) {
			if (old >= 0) {
				/*
				 * Readers are not allowed when excl wanted
				 */
				if ((old & SE_EWANTED) == 0) {
					pp->p_selock = old + SE_READER;
					mutex_exit(pse);
					return (1);
				}
			}
			mutex_exit(pse);
			return (0);
		}
		/*
		 * The page is free, so we really want SE_EXCL (below)
		 */
		VM_STAT_ADD(page_try_reclaim_upgrade);
	}

	/*
	 * The caller wants a writer lock.  We try for it only if
	 * SE_EWANTED is not set, or if the caller specified
	 * SE_EXCL_WANTED.
	 */
	if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) {
		if ((old & ~SE_EWANTED) == 0) {
			/* no reader/writer lock held */
			THREAD_KPRI_REQUEST();
			/* this clears out our setting of the SE_EWANTED bit */
			pp->p_selock = SE_WRITER;
			mutex_exit(pse);
			return (1);
		}
	}
	if (es & SE_EXCL_WANTED) {
		/* page is locked, set the SE_EWANTED bit */
		pp->p_selock |= SE_EWANTED;
	}
	mutex_exit(pse);
	return (0);
}
Esempio n. 20
0
/*
 * dcopy_cmd_poll()
 */
int
dcopy_cmd_poll(dcopy_cmd_t cmd, int flags)
{
	dcopy_handle_t channel;
	dcopy_cmd_priv_t priv;
	int e;


	priv = cmd->dp_private;
	channel = priv->pr_channel;

	/*
	 * if the caller is trying to block, they needed to post the
	 * command with DCOPY_CMD_INTR set.
	 */
	if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) {
		return (DCOPY_FAILURE);
	}

	atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64);

repoll:
	e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd);
	if (e == DCOPY_PENDING) {
		/*
		 * if the command is still active, and the blocking flag
		 * is set.
		 */
		if (flags & DCOPY_POLL_BLOCK) {

			/*
			 * if we haven't initialized the state, do it now. A
			 * command can be re-used, so it's possible it's
			 * already been initialized.
			 */
			if (!priv->pr_block_init) {
				priv->pr_block_init = B_TRUE;
				mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER,
				    NULL);
				cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL);
				priv->pr_cmd = cmd;
			}

			/* push it on the list for blocking commands */
			priv->pr_wait = B_TRUE;
			dcopy_list_push(&channel->ch_poll_list, priv);

			mutex_enter(&priv->pr_mutex);
			/*
			 * it's possible we already cleared pr_wait before we
			 * grabbed the mutex.
			 */
			if (priv->pr_wait) {
				cv_wait(&priv->pr_cv, &priv->pr_mutex);
			}
			mutex_exit(&priv->pr_mutex);

			/*
			 * the command has completed, go back and poll so we
			 * get the status.
			 */
			goto repoll;
		}
	}

	return (e);
}
Esempio n. 21
0
static void
auvia_attach(device_t parent, device_t self, void *aux)
{
	struct pci_attach_args *pa;
	struct auvia_softc *sc;
	const char *intrstr;
	pci_chipset_tag_t pc;
	pcitag_t pt;
	pci_intr_handle_t ih;
	pcireg_t pr;
	int r;
	const char *revnum;	/* VT823xx revision number */
	char intrbuf[PCI_INTRSTR_LEN];

	pa = aux;
	sc = device_private(self);
	sc->sc_dev = self;
	intrstr = NULL;
	pc = pa->pa_pc;
	pt = pa->pa_tag;
	revnum = NULL;

	aprint_naive(": Audio controller\n");

	sc->sc_play.sc_base = AUVIA_PLAY_BASE;
	sc->sc_record.sc_base = AUVIA_RECORD_BASE;
	if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VIATECH_VT8233_AC97) {
		sc->sc_flags |= AUVIA_FLAGS_VT8233;
		sc->sc_play.sc_base = VIA8233_MP_BASE;
		sc->sc_record.sc_base = VIA8233_WR_BASE;
	}

	if (pci_mapreg_map(pa, 0x10, PCI_MAPREG_TYPE_IO, 0, &sc->sc_iot,
		&sc->sc_ioh, NULL, &sc->sc_iosize)) {
		aprint_error(": can't map i/o space\n");
		return;
	}

	sc->sc_dmat = pa->pa_dmat;
	sc->sc_pc = pc;
	sc->sc_pt = pt;

	r = PCI_REVISION(pa->pa_class);
	if (sc->sc_flags & AUVIA_FLAGS_VT8233) {
		snprintf(sc->sc_revision, sizeof(sc->sc_revision), "0x%02X", r);
		switch(r) {
		case VIA_REV_8233PRE:
			/* same as 8233, but should not be in the market */
			revnum = "3-Pre";
			break;
		case VIA_REV_8233C:
			/* 2 rec, 4 pb, 1 multi-pb */
			revnum = "3C";
			break;
		case VIA_REV_8233:
			/* 2 rec, 4 pb, 1 multi-pb, spdif */
			revnum = "3";
			break;
		case VIA_REV_8233A:
			/* 1 rec, 1 multi-pb, spdif */
			revnum = "3A";
			break;
		default:
			break;
		}
		if (r >= VIA_REV_8237)
			revnum = "7";
		else if (r >= VIA_REV_8235) /* 2 rec, 4 pb, 1 multi-pb, spdif */
			revnum = "5";
		aprint_normal(": VIA Technologies VT823%s AC'97 Audio "
		    "(rev %s)\n", revnum, sc->sc_revision);
	} else {
		sc->sc_revision[1] = '\0';
		if (r == 0x20) {
			sc->sc_revision[0] = 'H';
		} else if ((r >= 0x10) && (r <= 0x14)) {
			sc->sc_revision[0] = 'A' + (r - 0x10);
		} else {
			snprintf(sc->sc_revision, sizeof(sc->sc_revision),
			    "0x%02X", r);
		}

		aprint_normal(": VIA Technologies VT82C686A AC'97 Audio "
		    "(rev %s)\n", sc->sc_revision);
	}

	if (pci_intr_map(pa, &ih)) {
		aprint_error(": couldn't map interrupt\n");
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
		return;
	}
	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));

	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
	mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_AUDIO);

	sc->sc_ih = pci_intr_establish(pc, ih, IPL_AUDIO, auvia_intr, sc);
	if (sc->sc_ih == NULL) {
		aprint_error_dev(sc->sc_dev, "couldn't establish interrupt");
		if (intrstr != NULL)
			aprint_error(" at %s", intrstr);
		aprint_error("\n");
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
		mutex_destroy(&sc->sc_lock);
		mutex_destroy(&sc->sc_intr_lock);
		return;
	}

	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);

	/* disable SBPro compat & others */
	pr = pci_conf_read(pc, pt, AUVIA_PCICONF_JUNK);

	pr &= ~AUVIA_PCICONF_ENABLES; /* clear compat function enables */
	/* XXX what to do about MIDI, FM, joystick? */

	pr |= (AUVIA_PCICONF_ACLINKENAB | AUVIA_PCICONF_ACNOTRST
		| AUVIA_PCICONF_ACVSR | AUVIA_PCICONF_ACSGD);

	pr &= ~(AUVIA_PCICONF_ACFM | AUVIA_PCICONF_ACSB);

	pci_conf_write(pc, pt, AUVIA_PCICONF_JUNK, pr);

	sc->host_if.arg = sc;
	sc->host_if.attach = auvia_attach_codec;
	sc->host_if.read = auvia_read_codec;
	sc->host_if.write = auvia_write_codec;
	sc->host_if.reset = auvia_reset_codec;
	sc->host_if.spdif_event = auvia_spdif_event;

	if ((r = ac97_attach(&sc->host_if, self, &sc->sc_lock)) != 0) {
		aprint_error_dev(sc->sc_dev, "can't attach codec (error 0x%X)\n", r);
		pci_intr_disestablish(pc, sc->sc_ih);
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
		mutex_destroy(&sc->sc_lock);
		mutex_destroy(&sc->sc_intr_lock);
		return;
	}

	/* setup audio_format */
	memcpy(sc->sc_formats, auvia_formats, sizeof(auvia_formats));
	mutex_enter(&sc->sc_lock);
	if (sc->sc_play.sc_base != VIA8233_MP_BASE || !AC97_IS_4CH(sc->codec_if)) {
		AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_4CH_8]);
		AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_4CH_16]);
	}
	if (sc->sc_play.sc_base != VIA8233_MP_BASE || !AC97_IS_6CH(sc->codec_if)) {
		AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_6CH_8]);
		AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_6CH_16]);
	}
	if (AC97_IS_FIXED_RATE(sc->codec_if)) {
		for (r = 0; r < AUVIA_NFORMATS; r++) {
			sc->sc_formats[r].frequency_type = 1;
			sc->sc_formats[r].frequency[0] = 48000;
		}
	}
	mutex_exit(&sc->sc_lock);

	if (0 != auconv_create_encodings(sc->sc_formats, AUVIA_NFORMATS,
					 &sc->sc_encodings)) {
		mutex_enter(&sc->sc_lock);
		sc->codec_if->vtbl->detach(sc->codec_if);
		mutex_exit(&sc->sc_lock);
		pci_intr_disestablish(pc, sc->sc_ih);
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
		mutex_destroy(&sc->sc_lock);
		mutex_destroy(&sc->sc_intr_lock);
		aprint_error_dev(sc->sc_dev, "can't create encodings\n");
		return;
	}
	if (0 != auconv_create_encodings(auvia_spdif_formats,
	    AUVIA_SPDIF_NFORMATS, &sc->sc_spdif_encodings)) {
		mutex_enter(&sc->sc_lock);
		sc->codec_if->vtbl->detach(sc->codec_if);
		mutex_exit(&sc->sc_lock);
		pci_intr_disestablish(pc, sc->sc_ih);
		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
		mutex_destroy(&sc->sc_lock);
		mutex_destroy(&sc->sc_intr_lock);
		aprint_error_dev(sc->sc_dev, "can't create spdif encodings\n");
		return;
	}

	if (!pmf_device_register(self, NULL, auvia_resume))
		aprint_error_dev(self, "couldn't establish power handler\n");

	audio_attach_mi(&auvia_hw_if, sc, sc->sc_dev);
	mutex_enter(&sc->sc_lock);
	sc->codec_if->vtbl->unlock(sc->codec_if);
	mutex_exit(&sc->sc_lock);
	return;
}
Esempio n. 22
0
/*
 * dcopy_device_register()
 */
int
dcopy_device_register(void *device_private, dcopy_device_info_t *info,
    dcopy_device_handle_t *handle)
{
	struct dcopy_channel_s *channel;
	struct dcopy_device_s *device;
	int e;
	int i;


	/* initialize the per device state */
	device = kmem_zalloc(sizeof (*device), KM_SLEEP);
	device->dc_device_private = device_private;
	device->dc_info = *info;
	device->dc_removing_cnt = 0;
	device->dc_cb = info->di_cb;

	/*
	 * we have a per device channel list so we can remove a device in the
	 * future.
	 */
	e = dcopy_list_init(&device->dc_devchan_list,
	    sizeof (struct dcopy_channel_s),
	    offsetof(struct dcopy_channel_s, ch_devchan_list_node));
	if (e != DCOPY_SUCCESS) {
		goto registerfail_devchan;
	}

	/*
	 * allocate state for each channel, allocate the channel,  and then add
	 * the devices dma channels to the devices channel list.
	 */
	for (i = 0; i < info->di_num_dma; i++) {
		channel = kmem_zalloc(sizeof (*channel), KM_SLEEP);
		channel->ch_device = device;
		channel->ch_removing = B_FALSE;
		channel->ch_ref_cnt = 0;
		channel->ch_cb = info->di_cb;

		e = info->di_cb->cb_channel_alloc(device_private, channel,
		    DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info,
		    &channel->ch_channel_private);
		if (e != DCOPY_SUCCESS) {
			kmem_free(channel, sizeof (*channel));
			goto registerfail_alloc;
		}

		e = dcopy_stats_init(channel);
		if (e != DCOPY_SUCCESS) {
			info->di_cb->cb_channel_free(
			    &channel->ch_channel_private);
			kmem_free(channel, sizeof (*channel));
			goto registerfail_alloc;
		}

		e = dcopy_list_init(&channel->ch_poll_list,
		    sizeof (struct dcopy_cmd_priv_s),
		    offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node));
		if (e != DCOPY_SUCCESS) {
			dcopy_stats_fini(channel);
			info->di_cb->cb_channel_free(
			    &channel->ch_channel_private);
			kmem_free(channel, sizeof (*channel));
			goto registerfail_alloc;
		}

		dcopy_list_push(&device->dc_devchan_list, channel);
	}

	/* add the device to device list */
	dcopy_list_push(&dcopy_statep->d_device_list, device);

	/*
	 * add the device's dma channels to the global channel list (where
	 * dcopy_alloc's come from)
	 */
	mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex);
	mutex_enter(&dcopy_statep->d_device_list.dl_mutex);
	channel = list_head(&device->dc_devchan_list.dl_list);
	while (channel != NULL) {
		list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list,
		    channel);
		dcopy_statep->d_globalchan_list.dl_cnt++;
		channel = list_next(&device->dc_devchan_list.dl_list, channel);
	}
	mutex_exit(&dcopy_statep->d_device_list.dl_mutex);
	mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);

	*handle = device;

	/* last call-back into kernel for dcopy KAPI enabled */
	uioa_dcopy_enable();

	return (DCOPY_SUCCESS);

registerfail_alloc:
	channel = list_head(&device->dc_devchan_list.dl_list);
	while (channel != NULL) {
		/* remove from the list */
		channel = dcopy_list_pop(&device->dc_devchan_list);
		ASSERT(channel != NULL);

		dcopy_list_fini(&channel->ch_poll_list);
		dcopy_stats_fini(channel);
		info->di_cb->cb_channel_free(&channel->ch_channel_private);
		kmem_free(channel, sizeof (*channel));
	}

	dcopy_list_fini(&device->dc_devchan_list);
registerfail_devchan:
	kmem_free(device, sizeof (*device));

	return (DCOPY_FAILURE);
}
Esempio n. 23
0
/* ARGSUSED */
static int
notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp)
{
	int			cmd;
	pid_t			pid;
	md_event_queue_t	*event_queue;
	md_event_t		*event;
	cred_t			*credp;
	char			*q_name;
	int			err = 0;
	size_t			sz = 0;
	md_event_ioctl_t	*ioctl;

	sz = sizeof (*ioctl);
	ioctl = kmem_zalloc(sz, KM_SLEEP);

	if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) {
		err = EFAULT;
		goto out;
	}

	if (ioctl->mdn_rev != MD_NOTIFY_REVISION) {
		err = EINVAL;
		goto out;
	}
	if (ioctl->mdn_magic != MD_EVENT_ID) {
		err = EINVAL;
		goto out;
	}

	pid = md_getpid();
	cmd = ioctl->mdn_cmd;
	q_name = ioctl->mdn_name;

	if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count))
		md_reaper();

	if ((cmd != EQ_ON) && (cmd != EQ_PUT)) {
		mutex_enter(&md_eventq_mx);
		if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) {
			mutex_exit(&md_eventq_mx);
			(void) notify_fillin_empty_ioctl
			    ((void *)ioctl, ioctl_in, sz, mode);
			err = ENOENT;
			goto out;
		}
	}

	switch (cmd) {
	    case EQ_ON:

		md_reaper();

		mutex_enter(&md_eventq_mx);
		if (md_find_event_queue(q_name, 0) != NULL) {
			mutex_exit(&md_eventq_mx);
			err = EEXIST;
			break;
		}

		/* allocate and initialize queue head */
		event_queue = (md_event_queue_t *)
		    kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP);
		if (event_queue == NULL) {
			mutex_exit(&md_eventq_mx);
			err = ENOMEM;
			break;
		}

		cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL);

		event_queue->mdn_flags = 0;
		event_queue->mdn_pid = pid;
		event_queue->mdn_proc = md_getproc();
		event_queue->mdn_size = 0;
		event_queue->mdn_front = NULL;
		event_queue->mdn_tail = NULL;
		event_queue->mdn_waiting = 0;
		event_queue->mdn_nextq = NULL;
		credp = ddi_get_cred();
		event_queue->mdn_uid = crgetuid(credp);
		bcopy(q_name, event_queue->mdn_name,
		    MD_NOTIFY_NAME_SIZE);
		if (ioctl->mdn_flags & EQ_Q_PERM)
			event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM;

		/* link into the list of event queues */
		if (md_event_queue != NULL)
			event_queue->mdn_nextq = md_event_queue;
		md_event_queue = event_queue;
		mutex_exit(&md_eventq_mx);
		err = 0;
		break;

	    case EQ_OFF:

		if (md_event_queue == NULL)
			return (ENOENT);

		event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
		event_queue->mdn_pid = 0;
		event_queue->mdn_proc = NULL;

		if (event_queue->mdn_waiting != 0)
			cv_broadcast(&event_queue->mdn_cv);

		/*
		 * force the reaper to delete this when it has no process
		 * waiting on it.
		 */
		mutex_exit(&md_eventq_mx);
		md_reaper();
		err = 0;
		break;

	    case EQ_GET_NOWAIT:
	    case EQ_GET_WAIT:
		if (cmd == EQ_GET_WAIT) {
			err = md_wait_for_event(event_queue, ioctl_in,
			    ioctl, sz, mode, lockp);
			if (err == EINTR)
				goto out;
		}
		ASSERT(MUTEX_HELD(&md_eventq_mx));
		if (event_queue->mdn_flags &
		    (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) {
			event_queue->mdn_flags &=
			    ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL);
			mutex_exit(&md_eventq_mx);
			err = notify_fillin_empty_ioctl
			    ((void *)ioctl, ioctl_in, sz, mode);
			ioctl->mdn_event = EQ_NOTIFY_LOST;
			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
			if (err)
				err = EFAULT;
			goto out;
		}
		if (event_queue->mdn_front != NULL) {
			event = event_queue->mdn_front;
			event_queue->mdn_front = event->mdn_next;
			event_queue->mdn_size--;
			if (event_queue->mdn_front == NULL)
				event_queue->mdn_tail = NULL;
			mutex_exit(&md_eventq_mx);
			ioctl->mdn_tag = event->mdn_tag;
			ioctl->mdn_set = event->mdn_set;
			ioctl->mdn_dev = event->mdn_dev;
			ioctl->mdn_event = event->mdn_event;
			ioctl->mdn_user = event->mdn_user;
			ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec;
			ioctl->mdn_time.tv_usec =
					event->mdn_time.tv_usec;
			kmem_free(event, sizeof (md_event_t));
			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
			if (err)
				err = EFAULT;
			goto out;
		} else { /* no elements on queue */
			mutex_exit(&md_eventq_mx);
			err = notify_fillin_empty_ioctl
			    ((void *)ioctl, ioctl_in, sz, mode);
			if (err)
				err = EFAULT;
		}

		if (cmd == EQ_GET_NOWAIT)
			err = EAGAIN;
		goto out;

	    case EQ_PUT:

		if (!md_event_queue) {
			err = ENOENT;
			break;
		}
		md_put_event(ioctl->mdn_tag,
			ioctl->mdn_set, ioctl->mdn_dev,
			ioctl->mdn_event, ioctl->mdn_user);
		err = 0;
		goto out;

	    default:
		err = EINVAL;
		goto out;
	}

out:
	kmem_free(ioctl, sz);
	return (err);
}
Esempio n. 24
0
/*
 * Lock a directory entry.  A dirlock on <dzp, name> protects that name
 * in dzp's directory zap object.  As long as you hold a dirlock, you can
 * assume two things: (1) dzp cannot be reaped, and (2) no other thread
 * can change the zap entry for (i.e. link or unlink) this name.
 *
 * Input arguments:
 *	dzp	- znode for directory
 *	name	- name of entry to lock
 *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
 *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
 *		  ZSHARED: allow concurrent access with other ZSHARED callers.
 *		  ZXATTR: we want dzp's xattr directory
 *		  ZCILOOK: On a mixed sensitivity file system,
 *			   this lookup should be case-insensitive.
 *		  ZCIEXACT: On a purely case-insensitive file system,
 *			    this lookup should be case-sensitive.
 *		  ZRENAMING: we are locking for renaming, force narrow locks
 *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
 *			     current thread already holds it.
 *
 * Output arguments:
 *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
 *	dlpp	- pointer to the dirlock for this entry (NULL on error)
 *      direntflags - (case-insensitive lookup only)
 *		flags if multiple case-sensitive matches exist in directory
 *      realpnp     - (case-insensitive lookup only)
 *		actual name matched within the directory
 *
 * Return value: 0 on success or errno on failure.
 *
 * NOTE: Always checks for, and rejects, '.' and '..'.
 * NOTE: For case-insensitive file systems we take wide locks (see below),
 *	 but return znode pointers to a single match.
 */
int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
    int flag, int *direntflags, pathname_t *realpnp)
{
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	zfs_dirlock_t	*dl;
	boolean_t	update;
	boolean_t	exact;
	uint64_t	zoid;
#ifdef HAVE_DNLC
	vnode_t		*vp = NULL;
#endif /* HAVE_DNLC */
	int		error = 0;
	int		cmpflags;

	*zpp = NULL;
	*dlpp = NULL;

	/*
	 * Verify that we are not trying to lock '.', '..', or '.zfs'
	 */
	if ((name[0] == '.' &&
	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
		return (SET_ERROR(EEXIST));

	/*
	 * Case sensitivity and normalization preferences are set when
	 * the file system is created.  These are stored in the
	 * zsb->z_case and zsb->z_norm fields.  These choices
	 * affect what vnodes can be cached in the DNLC, how we
	 * perform zap lookups, and the "width" of our dirlocks.
	 *
	 * A normal dirlock locks a single name.  Note that with
	 * normalization a name can be composed multiple ways, but
	 * when normalized, these names all compare equal.  A wide
	 * dirlock locks multiple names.  We need these when the file
	 * system is supporting mixed-mode access.  It is sometimes
	 * necessary to lock all case permutations of file name at
	 * once so that simultaneous case-insensitive/case-sensitive
	 * behaves as rationally as possible.
	 */

	/*
	 * Decide if exact matches should be requested when performing
	 * a zap lookup on file systems supporting case-insensitive
	 * access.
	 */
	exact =
	    ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
	    ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));

	/*
	 * Only look in or update the DNLC if we are looking for the
	 * name on a file system that does not require normalization
	 * or case folding.  We can also look there if we happen to be
	 * on a non-normalizing, mixed sensitivity file system IF we
	 * are looking for the exact name.
	 *
	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
	 * case for performance improvement?
	 */
	update = !zsb->z_norm ||
	    ((zsb->z_case == ZFS_CASE_MIXED) &&
	    !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));

	/*
	 * ZRENAMING indicates we are in a situation where we should
	 * take narrow locks regardless of the file system's
	 * preferences for normalizing and case folding.  This will
	 * prevent us deadlocking trying to grab the same wide lock
	 * twice if the two names happen to be case-insensitive
	 * matches.
	 */
	if (flag & ZRENAMING)
		cmpflags = 0;
	else
		cmpflags = zsb->z_norm;

	/*
	 * Wait until there are no locks on this name.
	 *
	 * Don't grab the the lock if it is already held. However, cannot
	 * have both ZSHARED and ZHAVELOCK together.
	 */
	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
	if (!(flag & ZHAVELOCK))
		rw_enter(&dzp->z_name_lock, RW_READER);

	mutex_enter(&dzp->z_lock);
	for (;;) {
		if (dzp->z_unlinked) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
				break;
		}
		if (error != 0) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		if (dl == NULL)	{
			/*
			 * Allocate a new dirlock and add it to the list.
			 */
			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
			dl->dl_name = name;
			dl->dl_sharecnt = 0;
			dl->dl_namelock = 0;
			dl->dl_namesize = 0;
			dl->dl_dzp = dzp;
			dl->dl_next = dzp->z_dirlocks;
			dzp->z_dirlocks = dl;
			break;
		}
		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
			break;
		cv_wait(&dl->dl_cv, &dzp->z_lock);
	}

	/*
	 * If the z_name_lock was NOT held for this dirlock record it.
	 */
	if (flag & ZHAVELOCK)
		dl->dl_namelock = 1;

	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
		/*
		 * We're the second shared reference to dl.  Make a copy of
		 * dl_name in case the first thread goes away before we do.
		 * Note that we initialize the new name before storing its
		 * pointer into dl_name, because the first thread may load
		 * dl->dl_name at any time.  He'll either see the old value,
		 * which is his, or the new shared copy; either is OK.
		 */
		dl->dl_namesize = strlen(dl->dl_name) + 1;
		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
		bcopy(dl->dl_name, name, dl->dl_namesize);
		dl->dl_name = name;
	}

	mutex_exit(&dzp->z_lock);

	/*
	 * We have a dirlock on the name.  (Note that it is the dirlock,
	 * not the dzp's z_lock, that protects the name in the zap object.)
	 * See if there's an object by this name; if so, put a hold on it.
	 */
	if (flag & ZXATTR) {
		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
		    sizeof (zoid));
		if (error == 0)
			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
	} else {
#ifdef HAVE_DNLC
		if (update)
			vp = dnlc_lookup(ZTOI(dzp), name);
		if (vp == DNLC_NO_VNODE) {
			iput(vp);
			error = SET_ERROR(ENOENT);
		} else if (vp) {
			if (flag & ZNEW) {
				zfs_dirent_unlock(dl);
				iput(vp);
				return (SET_ERROR(EEXIST));
			}
			*dlpp = dl;
			*zpp = VTOZ(vp);
			return (0);
		} else {
			error = zfs_match_find(zsb, dzp, name, exact,
			    update, direntflags, realpnp, &zoid);
		}
#else
		error = zfs_match_find(zsb, dzp, name, exact,
		    update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
	}
	if (error) {
		if (error != ENOENT || (flag & ZEXISTS)) {
			zfs_dirent_unlock(dl);
			return (error);
		}
	} else {
		if (flag & ZNEW) {
			zfs_dirent_unlock(dl);
			return (SET_ERROR(EEXIST));
		}
		error = zfs_zget(zsb, zoid, zpp);
		if (error) {
			zfs_dirent_unlock(dl);
			return (error);
		}
#ifdef HAVE_DNLC
		if (!(flag & ZXATTR) && update)
			dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
#endif /* HAVE_DNLC */
	}

	*dlpp = dl;

	return (0);
}
Esempio n. 25
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;

	*zpp = NULL;

again:
	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		zp = sa_get_userdata(hdl);


		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			/*
			 * If igrab() returns NULL the VFS has independently
			 * determined the inode should be evicted and has
			 * called iput_final() to start the eviction process.
			 * The SA handle is still valid but because the VFS
			 * requires that the eviction succeed we must drop
			 * our locks and references to allow the eviction to
			 * complete.  The zfs_zget() may then be retried.
			 *
			 * This unlikely case could be optimized by registering
			 * a sops->drop_inode() callback.  The callback would
			 * need to detect the active SA hold thereby informing
			 * the VFS that this inode should not be evicted.
			 */
			if (igrab(ZTOI(zp)) == NULL) {
				mutex_exit(&zp->z_lock);
				sa_buf_rele(db, NULL);
				ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
				goto again;
			}
			*zpp = zp;
			err = 0;
		}
		mutex_exit(&zp->z_lock);
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}
Esempio n. 26
0
void
zfs_rmnode(znode_t *zp)
{
	zfs_sb_t	*zsb = ZTOZSB(zp);
	objset_t	*os = zsb->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	uint64_t	xattr_obj;
	int		error;

	ASSERT(zp->z_links == 0);
	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);

			return;
		}
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
	    &xattr_obj, sizeof (xattr_obj));
	if (error == 0 && xattr_obj) {
		error = zfs_zget(zsb, xattr_obj, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zfs_external_acl(zp);

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);

	zfs_sa_upgrade_txholds(tx, zp);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		goto out;
	}

	if (xzp) {
		ASSERT(error == 0);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_links = 0;	/* no more links to it */
		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
		    &xzp->z_links, sizeof (xzp->z_links), tx));
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		zfs_iput_async(ZTOI(xzp));
}
Esempio n. 27
0
/*
 * This is the upward reentry point for packets arriving from the bridging
 * module and from mac_rx for links not part of a bridge.
 */
void
mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
{
	mac_impl_t		*mip = (mac_impl_t *)mh;
	mac_ring_t		*mr = (mac_ring_t *)mrh;
	mac_soft_ring_set_t 	*mac_srs;
	mblk_t			*bp = mp_chain;
	boolean_t		hw_classified = B_FALSE;

	/*
	 * If there are any promiscuous mode callbacks defined for
	 * this MAC, pass them a copy if appropriate.
	 */
	if (mip->mi_promisc_list != NULL)
		mac_promisc_dispatch(mip, mp_chain, NULL);

	if (mr != NULL) {
		/*
		 * If the SRS teardown has started, just return. The 'mr'
		 * continues to be valid until the driver unregisters the mac.
		 * Hardware classified packets will not make their way up
		 * beyond this point once the teardown has started. The driver
		 * is never passed a pointer to a flow entry or SRS or any
		 * structure that can be freed much before mac_unregister.
		 */
		mutex_enter(&mr->mr_lock);
		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
			mutex_exit(&mr->mr_lock);
			freemsgchain(mp_chain);
			return;
		}
		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
			hw_classified = B_TRUE;
			MR_REFHOLD_LOCKED(mr);
		}
		mutex_exit(&mr->mr_lock);

		/*
		 * We check if an SRS is controlling this ring.
		 * If so, we can directly call the srs_lower_proc
		 * routine otherwise we need to go through mac_rx_classify
		 * to reach the right place.
		 */
		if (hw_classified) {
			mac_srs = mr->mr_srs;
			/*
			 * This is supposed to be the fast path.
			 * All packets received though here were steered by
			 * the hardware classifier, and share the same
			 * MAC header info.
			 */
			mac_srs->srs_rx.sr_lower_proc(mh,
			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
			MR_REFRELE(mr);
			return;
		}
		/* We'll fall through to software classification */
	} else {
		flow_entry_t *flent;
		int err;

		rw_enter(&mip->mi_rw_lock, RW_READER);
		if (mip->mi_single_active_client != NULL) {
			flent = mip->mi_single_active_client->mci_flent_list;
			FLOW_TRY_REFHOLD(flent, err);
			rw_exit(&mip->mi_rw_lock);
			if (err == 0) {
				(flent->fe_cb_fn)(flent->fe_cb_arg1,
				    flent->fe_cb_arg2, mp_chain, B_FALSE);
				FLOW_REFRELE(flent);
				return;
			}
		} else {
			rw_exit(&mip->mi_rw_lock);
		}
	}

	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
			return;
	}

	freemsgchain(bp);
}
Esempio n. 28
0
/*
 * Link zp into dl.  Can only fail if zp has been unlinked.
 */
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
	znode_t *dzp = dl->dl_dzp;
	zfs_sb_t *zsb = ZTOZSB(zp);
	uint64_t value;
	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
	sa_bulk_attr_t bulk[5];
	uint64_t mtime[2], ctime[2];
	int count = 0;
	int error;

	mutex_enter(&zp->z_lock);

	if (!(flag & ZRENAMING)) {
		if (zp->z_unlinked) {	/* no new links to unlinked zp */
			ASSERT(!(flag & (ZNEW | ZEXISTS)));
			mutex_exit(&zp->z_lock);
			return (SET_ERROR(ENOENT));
		}
		zp->z_links++;
		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
		    &zp->z_links, sizeof (zp->z_links));

	}
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
	    &dzp->z_id, sizeof (dzp->z_id));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
	    &zp->z_pflags, sizeof (zp->z_pflags));

	if (!(flag & ZNEW)) {
		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
		    ctime, sizeof (ctime));
		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
		    ctime, B_TRUE);
	}
	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);

	mutex_exit(&zp->z_lock);

	mutex_enter(&dzp->z_lock);
	dzp->z_size++;
	dzp->z_links += zp_is_dir;
	count = 0;
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
	    &dzp->z_size, sizeof (dzp->z_size));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
	    &dzp->z_links, sizeof (dzp->z_links));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
	    mtime, sizeof (mtime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
	    ctime, sizeof (ctime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
	    &dzp->z_pflags, sizeof (dzp->z_pflags));
	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);
	mutex_exit(&dzp->z_lock);

	value = zfs_dirent(zp, zp->z_mode);
	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
	    8, 1, &value, tx);
	ASSERT(error == 0);

	return (0);
}
Esempio n. 29
0
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_t **osp)
{
	objset_t *os;
	int i, err;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE);
	os->os_dsl_dataset = ds;
	os->os_spa = spa;
	os->os_rootbp = bp;
	if (!BP_IS_HOLE(os->os_rootbp)) {
		uint32_t aflags = ARC_WAIT;
		zbookmark_t zb;
		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

		if (DMU_OS_IS_L2CACHEABLE(os))
			aflags |= ARC_L2CACHE;
		if (DMU_OS_IS_L2COMPRESSIBLE(os))
			aflags |= ARC_L2COMPRESS;

		dprintf_bp(os->os_rootbp, "reading %s", "");
		err = arc_read(NULL, spa, os->os_rootbp,
		    arc_getbuf_func, &os->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err != 0) {
			kmem_free(os, sizeof (objset_t));
			/* convert checksum errors into IO errors */
			if (err == ECKSUM)
				err = SET_ERROR(EIO);
			return (err);
		}

		/* Increase the blocksize if we are permitted. */
		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
			arc_buf_t *buf = arc_buf_alloc(spa,
			    sizeof (objset_phys_t), &os->os_phys_buf,
			    ARC_BUFC_METADATA);
			bzero(buf->b_data, sizeof (objset_phys_t));
			bcopy(os->os_phys_buf->b_data, buf->b_data,
			    arc_buf_size(os->os_phys_buf));
			(void) arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf);
			os->os_phys_buf = buf;
		}

		os->os_phys = os->os_phys_buf->b_data;
		os->os_flags = os->os_phys->os_flags;
	} else {
		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
		os->os_phys_buf = arc_buf_alloc(spa, size,
		    &os->os_phys_buf, ARC_BUFC_METADATA);
		os->os_phys = os->os_phys_buf->b_data;
		bzero(os->os_phys, size);
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know about
	 * checksum/compression/copies.
	 */
	if (ds) {
		err = dsl_prop_register(ds,
		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
		    primary_cache_changed_cb, os);
		if (err == 0) {
			err = dsl_prop_register(ds,
			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
			    secondary_cache_changed_cb, os);
		}
		if (!dsl_dataset_is_snapshot(ds)) {
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
				    checksum_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
				    compression_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COPIES),
				    copies_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_DEDUP),
				    dedup_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
				    logbias_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_SYNC),
				    sync_changed_cb, os);
			}
		}
		if (err != 0) {
			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf));
			kmem_free(os, sizeof (objset_t));
			return (err);
		}
	} else if (ds == NULL) {
		/* It's the meta-objset. */
		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		os->os_compress = ZIO_COMPRESS_LZJB;
		os->os_copies = spa_max_replication(spa);
		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
		os->os_dedup_verify = 0;
		os->os_logbias = 0;
		os->os_sync = 0;
		os->os_primary_cache = ZFS_CACHE_ALL;
		os->os_secondary_cache = ZFS_CACHE_ALL;
	}

	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
		os->os_zil_header = os->os_phys->os_zil_header;
	os->os_zil = zil_alloc(os, &os->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&os->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	DMU_META_DNODE(os) = dnode_special_open(os,
	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
	    &os->os_meta_dnode);
	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
		    &os->os_userused_dnode);
		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
		    &os->os_groupused_dnode);
	}

	/*
	 * We should be the only thread trying to do this because we
	 * have ds_opening_lock
	 */
	if (ds) {
		mutex_enter(&ds->ds_lock);
		ASSERT(ds->ds_objset == NULL);
		ds->ds_objset = os;
		mutex_exit(&ds->ds_lock);
	}

	*osp = os;
	return (0);
}
Esempio n. 30
0
/*
 * launch slave cpus into kernel text, pause them,
 * and restore the original prom pages
 */
void
i_cpr_mp_setup(void)
{
	extern void restart_other_cpu(int);
	cpu_t *cp;

	uint64_t kctx = kcontextreg;

	/*
	 * Do not allow setting page size codes in MMU primary context
	 * register while using cif wrapper. This is needed to work
	 * around OBP incorrect handling of this MMU register.
	 */
	kcontextreg = 0;

	/*
	 * reset cpu_ready_set so x_calls work properly
	 */
	CPUSET_ZERO(cpu_ready_set);
	CPUSET_ADD(cpu_ready_set, getprocessorid());

	/*
	 * setup cif to use the cookie from the new/tmp prom
	 * and setup tmp handling for calling prom services.
	 */
	i_cpr_cif_setup(CIF_SPLICE);

	/*
	 * at this point, only the nucleus and a few cpr pages are
	 * mapped in.  once we switch to the kernel trap table,
	 * we can access the rest of kernel space.
	 */
	prom_set_traptable(&trap_table);

	if (ncpus > 1) {
		sfmmu_init_tsbs();

		mutex_enter(&cpu_lock);
		/*
		 * All of the slave cpus are not ready at this time,
		 * yet the cpu structures have various cpu_flags set;
		 * clear cpu_flags and mutex_ready.
		 * Since we are coming up from a CPU suspend, the slave cpus
		 * are frozen.
		 */
		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
			cp->cpu_flags = CPU_FROZEN;
			cp->cpu_m.mutex_ready = 0;
		}

		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
			restart_other_cpu(cp->cpu_id);

		pause_cpus(NULL, NULL);
		mutex_exit(&cpu_lock);

		i_cpr_xcall(i_cpr_clear_entries);
	} else
		i_cpr_clear_entries(0, 0);

	/*
	 * now unlink the cif wrapper;  WARNING: do not call any
	 * prom_xxx() routines until after prom pages are restored.
	 */
	i_cpr_cif_setup(CIF_UNLINK);

	(void) i_cpr_prom_pages(CPR_PROM_RESTORE);

	/* allow setting page size codes in MMU primary context register */
	kcontextreg = kctx;
}