void linux32_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) { struct lwp *l = curlwp; struct proc *p = l->l_proc; struct trapframe *tf; struct linux32_rt_sigframe *fp, frame; int onstack, error; linux32_siginfo_t *lsi; int sig = ksi->ksi_signo; sig_t catcher = SIGACTION(p, sig).sa_handler; struct sigaltstack *sas = &l->l_sigstk; tf = l->l_md.md_regs; /* Do we need to jump onto the signal stack? */ onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; /* Allocate space for the signal handler context. */ if (onstack) fp = (struct linux32_rt_sigframe *)((char *)sas->ss_sp + sas->ss_size); else fp = (struct linux32_rt_sigframe *)tf->tf_rsp; fp--; /* Build stack frame for signal trampoline. */ NETBSD32PTR32(frame.sf_handler, catcher); frame.sf_sig = native_to_linux32_signo[sig]; NETBSD32PTR32(frame.sf_sip, &fp->sf_si); NETBSD32PTR32(frame.sf_ucp, &fp->sf_uc); DPRINTF(("rt: onstack = %d, fp = %p sig = %d rip = 0x%lx\n", onstack, fp, sig, tf->tf_rip)); lsi = &frame.sf_si; (void)memset(lsi, 0, sizeof(frame.sf_si)); lsi->lsi_errno = native_to_linux32_errno[ksi->ksi_errno]; lsi->lsi_code = native_to_linux_si_code(ksi->ksi_code); lsi->lsi_signo = frame.sf_sig; switch (lsi->lsi_signo) { case LINUX32_SIGILL: case LINUX32_SIGFPE: case LINUX32_SIGSEGV: case LINUX32_SIGBUS: case LINUX32_SIGTRAP: NETBSD32PTR32(lsi->lsi_addr, ksi->ksi_addr); break; case LINUX32_SIGCHLD: lsi->lsi_uid = ksi->ksi_uid; lsi->lsi_pid = ksi->ksi_pid; lsi->lsi_utime = ksi->ksi_utime; lsi->lsi_stime = ksi->ksi_stime; lsi->lsi_status = native_to_linux_si_status(ksi->ksi_code, ksi->ksi_status); break; case LINUX32_SIGIO: lsi->lsi_band = ksi->ksi_band; lsi->lsi_fd = ksi->ksi_fd; break; default: lsi->lsi_uid = ksi->ksi_uid; lsi->lsi_pid = ksi->ksi_pid; if (lsi->lsi_signo == LINUX32_SIGALRM || lsi->lsi_signo >= LINUX32_SIGRTMIN) NETBSD32PTR32(lsi->lsi_value.sival_ptr, ksi->ksi_value.sival_ptr); break; } /* Save register context. */ linux32_save_ucontext(l, tf, mask, sas, &frame.sf_uc); sendsig_reset(l, sig); mutex_exit(p->p_lock); error = copyout(&frame, fp, sizeof(frame)); mutex_enter(p->p_lock); if (error != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ sigexit(l, SIGILL); /* NOTREACHED */ } /* * Build context to run handler in. */ tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_rip = (((long)p->p_sigctx.ps_sigcode) + (linux32_rt_sigcode - linux32_sigcode)) & 0xffffffff; tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL) & 0xffffffff; tf->tf_rflags &= ~PSL_CLEARSIG & 0xffffffff; tf->tf_rsp = (long)fp & 0xffffffff; tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; /* Remember that we're now on the signal stack. */ if (onstack) sas->ss_flags |= SS_ONSTACK; return; }
/* * Main routine for the callbacks notifications thread */ static void i_mac_notify_thread(void *arg) { mac_impl_t *mip = arg; callb_cpr_t cprinfo; mac_cb_t *mcb; mac_cb_info_t *mcbi; mac_notify_cb_t *mncb; mcbi = &mip->mi_notify_cb_info; CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr, "i_mac_notify_thread"); mutex_enter(mcbi->mcbi_lockp); for (;;) { uint32_t bits; uint32_t type; bits = mip->mi_notify_bits; if (bits == 0) { CALLB_CPR_SAFE_BEGIN(&cprinfo); cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp); continue; } mip->mi_notify_bits = 0; if ((bits & (1 << MAC_NNOTE)) != 0) { /* request to quit */ ASSERT(mip->mi_state_flags & MIS_DISABLED); break; } mutex_exit(mcbi->mcbi_lockp); /* * Log link changes on the actual link, but then do reports on * synthetic state (if part of a bridge). */ if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) { link_state_t newstate; mac_handle_t mh; i_mac_log_link_state(mip); newstate = mip->mi_lowlinkstate; if (mip->mi_bridge_link != NULL) { mutex_enter(&mip->mi_bridge_lock); if ((mh = mip->mi_bridge_link) != NULL) { newstate = mac_bridge_ls_cb(mh, newstate); } mutex_exit(&mip->mi_bridge_lock); } if (newstate != mip->mi_linkstate) { mip->mi_linkstate = newstate; bits |= 1 << MAC_NOTE_LINK; } } /* * Do notification callbacks for each notification type. */ for (type = 0; type < MAC_NNOTE; type++) { if ((bits & (1 << type)) == 0) { continue; } if (mac_notify_cb_list[type] != NULL) (*mac_notify_cb_list[type])(mip); /* * Walk the list of notifications. */ MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info); for (mcb = mip->mi_notify_cb_list; mcb != NULL; mcb = mcb->mcb_nextp) { mncb = (mac_notify_cb_t *)mcb->mcb_objp; mncb->mncb_fn(mncb->mncb_arg, type); } MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info, &mip->mi_notify_cb_list); } mutex_enter(mcbi->mcbi_lockp); } mip->mi_state_flags |= MIS_NOTIFY_DONE; cv_broadcast(&mcbi->mcbi_cv); /* CALLB_CPR_EXIT drops the lock */ CALLB_CPR_EXIT(&cprinfo); thread_exit(); }
int signotify(int cmd, siginfo_t *siginfo, signotify_id_t *sn_id) { k_siginfo_t info; signotify_id_t id; proc_t *p; proc_t *cp = curproc; signotifyq_t *snqp; struct cred *cr; sigqueue_t *sqp; sigqhdr_t *sqh; u_longlong_t sid; model_t datamodel = get_udatamodel(); if (copyin(sn_id, &id, sizeof (signotify_id_t))) return (set_errno(EFAULT)); if (id.sn_index >= _SIGNOTIFY_MAX || id.sn_index < 0) return (set_errno(EINVAL)); switch (cmd) { case SN_PROC: /* get snid for the given user address of signotifyid_t */ sid = get_sigid(cp, (caddr_t)sn_id); if (id.sn_pid > 0) { mutex_enter(&pidlock); if ((p = prfind(id.sn_pid)) != NULL) { mutex_enter(&p->p_lock); if (p->p_signhdr != NULL) { snqp = SIGN_PTR(p, id.sn_index); if (snqp->sn_snid == sid) { mutex_exit(&p->p_lock); mutex_exit(&pidlock); return (set_errno(EBUSY)); } } mutex_exit(&p->p_lock); } mutex_exit(&pidlock); } if (copyin_siginfo(datamodel, siginfo, &info)) return (set_errno(EFAULT)); /* The si_code value must indicate the signal will be queued */ if (!sigwillqueue(info.si_signo, info.si_code)) return (set_errno(EINVAL)); if (cp->p_signhdr == NULL) { /* Allocate signotify pool first time */ sqh = sigqhdralloc(sizeof (signotifyq_t), _SIGNOTIFY_MAX); mutex_enter(&cp->p_lock); if (cp->p_signhdr == NULL) { /* hang the pool head on proc */ cp->p_signhdr = sqh; } else { /* another lwp allocated the pool, free ours */ sigqhdrfree(sqh); } } else { mutex_enter(&cp->p_lock); } sqp = sigqalloc(cp->p_signhdr); if (sqp == NULL) { mutex_exit(&cp->p_lock); return (set_errno(EAGAIN)); } cr = CRED(); sqp->sq_info = info; sqp->sq_info.si_pid = cp->p_pid; sqp->sq_info.si_ctid = PRCTID(cp); sqp->sq_info.si_zoneid = getzoneid(); sqp->sq_info.si_uid = crgetruid(cr); /* fill the signotifyq_t fields */ ((signotifyq_t *)sqp)->sn_snid = sid; mutex_exit(&cp->p_lock); /* complete the signotify_id_t fields */ id.sn_index = (signotifyq_t *)sqp - SIGN_PTR(cp, 0); id.sn_pid = cp->p_pid; break; case SN_CANCEL: case SN_SEND: sid = get_sigid(cp, (caddr_t)sn_id); mutex_enter(&pidlock); if ((id.sn_pid <= 0) || ((p = prfind(id.sn_pid)) == NULL)) { mutex_exit(&pidlock); return (set_errno(EINVAL)); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if (p->p_signhdr == NULL) { mutex_exit(&p->p_lock); return (set_errno(EINVAL)); } snqp = SIGN_PTR(p, id.sn_index); if (snqp->sn_snid == 0) { mutex_exit(&p->p_lock); return (set_errno(EINVAL)); } if (snqp->sn_snid != sid) { mutex_exit(&p->p_lock); return (set_errno(EINVAL)); } snqp->sn_snid = 0; /* cmd == SN_CANCEL or signo == 0 (SIGEV_NONE) */ if (((sigqueue_t *)snqp)->sq_info.si_signo <= 0) cmd = SN_CANCEL; sigqsend(cmd, p, 0, (sigqueue_t *)snqp); mutex_exit(&p->p_lock); id.sn_pid = 0; id.sn_index = 0; break; default : return (set_errno(EINVAL)); } if (copyout(&id, sn_id, sizeof (signotify_id_t))) return (set_errno(EFAULT)); return (0); }
/* * smb2sr_work * * This function processes each SMB command in the current request * (which may be a compound request) building a reply containing * SMB reply messages, one-to-one with the SMB commands. Some SMB * commands (change notify, blocking pipe read) may require both an * "interim response" and a later "async response" at completion. * In such cases, we'll encode the interim response in the reply * compound we're building, and put the (now async) command on a * list of commands that need further processing. After we've * finished processing the commands in this compound and building * the compound reply, we'll send the compound reply, and finally * process the list of async commands. * * As we work our way through the compound request and reply, * we need to keep track of the bounds of the current request * and reply. For the request, this uses an MBC_SHADOW_CHAIN * that begins at smb2_cmd_hdr. The reply is appended to the * sr->reply chain starting at smb2_reply_hdr. * * This function must always free the smb request. */ void smb2sr_work(struct smb_request *sr) { smb_session_t *session; uint32_t msg_len; int rc; boolean_t disconnect = B_FALSE; session = sr->session; ASSERT(sr->tid_tree == 0); ASSERT(sr->uid_user == 0); ASSERT(sr->fid_ofile == 0); sr->smb_fid = (uint16_t)-1; /* temporary until we identify a user */ sr->user_cr = zone_kcred(); mutex_enter(&sr->sr_mutex); switch (sr->sr_state) { case SMB_REQ_STATE_SUBMITTED: case SMB_REQ_STATE_CLEANED_UP: sr->sr_state = SMB_REQ_STATE_ACTIVE; break; default: ASSERT(0); /* FALLTHROUGH */ case SMB_REQ_STATE_CANCELED: goto complete_unlock_free; } mutex_exit(&sr->sr_mutex); cmd_start: /* * Reserve space for the reply header, and save the offset. * The reply header will be overwritten later. */ sr->smb2_reply_hdr = sr->reply.chain_offset; (void) smb_mbc_encodef(&sr->reply, "#.", SMB2_HDR_SIZE); /* * Decode the request header * * Most problems with decoding will result in the error * STATUS_INVALID_PARAMETER. If the decoding problem * prevents continuing, we'll close the connection. * [MS-SMB2] 3.3.5.2.6 Handling Incorrectly Formatted... */ sr->smb2_status = 0; sr->smb2_cmd_hdr = sr->command.chain_offset; if ((rc = smb2_decode_header(sr)) != 0) { cmn_err(CE_WARN, "clnt %s bad SMB2 header", session->ip_addr_str); disconnect = B_TRUE; goto cleanup; } /* * Figure out the length of data following the SMB2 header. * It ends at either the next SMB2 header if there is one * (smb2_next_command != 0) or at the end of the message. */ if (sr->smb2_next_command != 0) { /* [MS-SMB2] says this is 8-byte aligned */ msg_len = sr->smb2_next_command; if ((msg_len & 7) != 0 || (msg_len < SMB2_HDR_SIZE) || ((sr->smb2_cmd_hdr + msg_len) > sr->command.max_bytes)) { cmn_err(CE_WARN, "clnt %s bad SMB2 next cmd", session->ip_addr_str); disconnect = B_TRUE; goto cleanup; } } else { msg_len = sr->command.max_bytes - sr->smb2_cmd_hdr; } /* * Setup a shadow chain for this SMB2 command, starting * with the header and ending at either the next command * or the end of the message. Note that we've already * decoded the header, so chain_offset is now positioned * at the end of the header. The signing check needs the * entire SMB2 command, so we'll shadow starting at the * smb2_cmd_hdr offset. After the signing check, we'll * move chain_offset up to the end of the header. */ (void) MBC_SHADOW_CHAIN(&sr->smb_data, &sr->command, sr->smb2_cmd_hdr, msg_len); /* * Verify SMB signature if signing is enabled and active now. * [MS-SMB2] 3.3.5.2.4 Verifying the Signature */ if ((sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) != 0) { rc = smb2_sign_check_request(sr); if (rc != 0) { DTRACE_PROBE1(smb2__sign__check, smb_request_t, sr); if (session->signing.flags & SMB_SIGNING_CHECK) { smb2sr_put_error(sr, NT_STATUS_ACCESS_DENIED); goto cmd_finish; } } } /* * Now that the signing check is done with smb_data, * advance past the SMB2 header we decoded above. * This leaves sr->smb_data correctly positioned * for command-specific decoding in the dispatch * function called next. */ sr->smb_data.chain_offset = sr->smb2_cmd_hdr + SMB2_HDR_SIZE; /* * Default credit response. Command handler may modify. */ sr->smb2_credit_response = sr->smb2_credit_request; /* * Common dispatch (for sync & async) */ rc = smb2sr_dispatch(sr, NULL); switch (rc) { case SDRC_SUCCESS: break; default: /* * SMB2 does not use the other dispatch return codes. * If we see something else, log an event so we'll * know something is returning bogus status codes. * If you see these in the log, use dtrace to find * the code returning something else. */ #ifdef DEBUG cmn_err(CE_NOTE, "smb2sr_dispatch -> 0x%x", rc); #endif /* FALLTHROUGH */ case SDRC_ERROR: if (sr->smb2_status == 0) sr->smb2_status = NT_STATUS_INTERNAL_ERROR; break; case SDRC_DROP_VC: disconnect = B_TRUE; goto cleanup; } /* * If there's a next command, figure out where it starts, * and fill in the next command offset for the reply. * Note: We sanity checked smb2_next_command above * (the offset to the next command). Similarly set * smb2_next_reply as the offset to the next reply. */ cmd_finish: if (sr->smb2_next_command != 0) { sr->command.chain_offset = sr->smb2_cmd_hdr + sr->smb2_next_command; sr->smb2_next_reply = sr->reply.chain_offset - sr->smb2_reply_hdr; } else { sr->smb2_next_reply = 0; } /* * Overwrite the SMB2 header for the response of * this command (possibly part of a compound). */ sr->smb2_hdr_flags |= SMB2_FLAGS_SERVER_TO_REDIR; (void) smb2_encode_header(sr, B_TRUE); if (sr->smb2_hdr_flags & SMB2_FLAGS_SIGNED) smb2_sign_reply(sr); if (sr->smb2_next_command != 0) goto cmd_start; /* * We've done all the commands in this compound. * Send it out. */ smb2_send_reply(sr); /* * If any of the requests "went async", process those now. */ if (sr->sr_async_req != NULL) { smb2sr_do_async(sr); } cleanup: if (disconnect) { smb_rwx_rwenter(&session->s_lock, RW_WRITER); switch (session->s_state) { case SMB_SESSION_STATE_DISCONNECTED: case SMB_SESSION_STATE_TERMINATED: break; default: smb_soshutdown(session->sock); session->s_state = SMB_SESSION_STATE_DISCONNECTED; break; } smb_rwx_rwexit(&session->s_lock); } mutex_enter(&sr->sr_mutex); complete_unlock_free: sr->sr_state = SMB_REQ_STATE_COMPLETED; mutex_exit(&sr->sr_mutex); smb_request_free(sr); }
/* * Construct a znode+inode and initialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl, struct inode *dip) { znode_t *zp; struct inode *ip; uint64_t mode; uint64_t parent; sa_bulk_attr_t bulk[9]; int count = 0; ASSERT(zsb != NULL); ip = new_inode(zsb->z_sb); if (ip == NULL) return (NULL); zp = ITOZ(ip); ASSERT(zp->z_dirlocks == NULL); ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); ASSERT3P(zp->z_xattr_parent, ==, NULL); zp->z_moved = 0; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; zp->z_is_zvol = B_FALSE; zp->z_is_mapped = B_FALSE; zp->z_is_ctldir = B_FALSE; zp->z_is_stale = B_FALSE; zfs_znode_sa_init(zsb, zp, db, obj_type, hdl); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, 8); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { if (hdl == NULL) sa_handle_destroy(zp->z_sa_hdl); goto error; } zp->z_mode = mode; /* * xattr znodes hold a reference on their unique parent */ if (dip && zp->z_pflags & ZFS_XATTR) { igrab(dip); zp->z_xattr_parent = ITOZ(dip); } ip->i_ino = obj; zfs_inode_update(zp); zfs_inode_set_ops(zsb, ip); /* * The only way insert_inode_locked() can fail is if the ip->i_ino * number is already hashed for this super block. This can never * happen because the inode numbers map 1:1 with the object numbers. * * The one exception is rolling back a mounted file system, but in * this case all the active inode are unhashed during the rollback. */ VERIFY3S(insert_inode_locked(ip), ==, 0); mutex_enter(&zsb->z_znodes_lock); list_insert_tail(&zsb->z_all_znodes, zp); zsb->z_nr_znodes++; membar_producer(); mutex_exit(&zsb->z_znodes_lock); unlock_new_inode(ip); return (zp); error: unlock_new_inode(ip); iput(ip); return (NULL); }
/* * With the addition of reader-writer lock semantics to page_lock_es, * callers wanting an exclusive (writer) lock may prevent shared-lock * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. * In this case, when an exclusive lock cannot be acquired, p_selock's * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied * if the page is slated for retirement. * * The se and es parameters determine if the lock should be granted * based on the following decision table: * * Lock wanted es flags p_selock/SE_EWANTED Action * ----------- -------------- ------------------- --------- * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED * SE_EXCL none any lock/any deny * SE_SHARED n/a [2] shared/0 grant * SE_SHARED n/a [2] unlocked/0 grant * SE_SHARED n/a shared/1 deny * SE_SHARED n/a unlocked/1 deny * SE_SHARED n/a excl/any deny * * Notes: * [1] The code grants an exclusive lock to the caller and clears the bit * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED * bit's value. This was deemed acceptable as we are not concerned about * exclusive-lock starvation. If this ever becomes an issue, a priority or * fifo mechanism should also be implemented. Meantime, the thread that * set SE_EWANTED should be prepared to catch this condition and reset it * * [2] Retired pages may not be locked at any time, regardless of the * dispostion of se, unless the es parameter has SE_RETIRED flag set. * * Notes on values of "es": * * es & 1: page_lookup_create will attempt page relocation * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete * memory thread); this prevents reader-starvation of waiting * writer thread(s) by giving priority to writers over readers. * es & SE_RETIRED: caller wants to lock pages even if they are * retired. Default is to deny the lock if the page is retired. * * And yes, we know, the semantics of this function are too complicated. * It's on the list to be cleaned up. */ int page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) { int retval; kmutex_t *pse = PAGE_SE_MUTEX(pp); int upgraded; int reclaim_it; ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); VM_STAT_ADD(page_lock_count); upgraded = 0; reclaim_it = 0; mutex_enter(pse); ASSERT(((es & SE_EXCL_WANTED) == 0) || ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { mutex_exit(pse); VM_STAT_ADD(page_lock_retired); return (0); } if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { se = SE_EXCL; } if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { reclaim_it = 1; if (se == SE_SHARED) { /* * This is an interesting situation. * * Remember that p_free can only change if * p_selock < 0. * p_free does not depend on our holding `pse'. * And, since we hold `pse', p_selock can not change. * So, if p_free changes on us, the page is already * exclusively held, and we would fail to get p_selock * regardless. * * We want to avoid getting the share * lock on a free page that needs to be reclaimed. * It is possible that some other thread has the share * lock and has left the free page on the cache list. * pvn_vplist_dirty() does this for brief periods. * If the se_share is currently SE_EXCL, we will fail * to acquire p_selock anyway. Blocking is the * right thing to do. * If we need to reclaim this page, we must get * exclusive access to it, force the upgrade now. * Again, we will fail to acquire p_selock if the * page is not free and block. */ upgraded = 1; se = SE_EXCL; VM_STAT_ADD(page_lock_upgrade); } } if (se == SE_EXCL) { if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { /* * if the caller wants a writer lock (but did not * specify exclusive access), and there is a pending * writer that wants exclusive access, return failure */ retval = 0; } else if ((pp->p_selock & ~SE_EWANTED) == 0) { /* no reader/writer lock held */ THREAD_KPRI_REQUEST(); /* this clears our setting of the SE_EWANTED bit */ pp->p_selock = SE_WRITER; retval = 1; } else { /* page is locked */ if (es & SE_EXCL_WANTED) { /* set the SE_EWANTED bit */ pp->p_selock |= SE_EWANTED; } retval = 0; } } else { retval = 0; if (pp->p_selock >= 0) { if ((pp->p_selock & SE_EWANTED) == 0) { pp->p_selock += SE_READER; retval = 1; } } } if (retval == 0) { if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { VM_STAT_ADD(page_lock_deleted); mutex_exit(pse); return (retval); } #ifdef VM_STATS VM_STAT_ADD(page_lock_miss); if (upgraded) { VM_STAT_ADD(page_lock_upgrade_failed); } #endif if (lock) { VM_STAT_ADD(page_lock_miss_lock); mutex_exit(lock); } /* * Now, wait for the page to be unlocked and * release the lock protecting p_cv and p_selock. */ cv_wait(&pp->p_cv, pse); mutex_exit(pse); /* * The page identity may have changed while we were * blocked. If we are willing to depend on "pp" * still pointing to a valid page structure (i.e., * assuming page structures are not dynamically allocated * or freed), we could try to lock the page if its * identity hasn't changed. * * This needs to be measured, since we come back from * cv_wait holding pse (the expensive part of this * operation) we might as well try the cheap part. * Though we would also have to confirm that dropping * `lock' did not cause any grief to the callers. */ if (lock) { mutex_enter(lock); } } else { /* * We have the page lock. * If we needed to reclaim the page, and the page * needed reclaiming (ie, it was free), then we * have the page exclusively locked. We may need * to downgrade the page. */ ASSERT((upgraded) ? ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); mutex_exit(pse); /* * We now hold this page's lock, either shared or * exclusive. This will prevent its identity from changing. * The page, however, may or may not be free. If the caller * requested, and it is free, go reclaim it from the * free list. If the page can't be reclaimed, return failure * so that the caller can start all over again. * * NOTE:page_reclaim() releases the page lock (p_selock) * if it can't be reclaimed. */ if (reclaim_it) { if (!page_reclaim(pp, lock)) { VM_STAT_ADD(page_lock_bad_reclaim); retval = 0; } else { VM_STAT_ADD(page_lock_reclaim); if (upgraded) { page_downgrade(pp); } } } } return (retval); }
/* * Find, take and return a mutex held by hat_page_demote(). * Called by page_demote_vp_pages() before hat_page_demote() call and by * routines that want to block hat_page_demote() but can't do it * via locking all constituent pages. * * Return NULL if p_szc is 0. * * It should only be used for pages that can be demoted by hat_page_demote() * i.e. non swapfs file system pages. The logic here is lifted from * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase * since the page is locked and not free. * * Hash of the root page is used to find the lock. * To find the root in the presense of hat_page_demote() chageing the location * of the root this routine relies on the fact that hat_page_demote() changes * root last. * * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is * returned pp's p_szc may be any value. */ kmutex_t * page_szc_lock(page_t *pp) { kmutex_t *mtx; page_t *rootpp; uint_t szc; uint_t rszc; uint_t pszc = pp->p_szc; ASSERT(pp != NULL); ASSERT(PAGE_LOCKED(pp)); ASSERT(!PP_ISFREE(pp)); ASSERT(pp->p_vnode != NULL); ASSERT(!IS_SWAPFSVP(pp->p_vnode)); ASSERT(!PP_ISKAS(pp)); again: if (pszc == 0) { VM_STAT_ADD(pszclck_stat[0]); return (NULL); } /* The lock lives in the root page */ rootpp = PP_GROUPLEADER(pp, pszc); mtx = PAGE_SZC_MUTEX(rootpp); mutex_enter(mtx); /* * since p_szc can only decrease if pp == rootpp * rootpp will be always the same i.e we have the right root * regardless of rootpp->p_szc. * If location of pp's root didn't change after we took * the lock we have the right root. return mutex hashed off it. */ if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { VM_STAT_ADD(pszclck_stat[1]); return (mtx); } /* * root location changed because page got demoted. * locate the new root. */ if (rszc < pszc) { szc = pp->p_szc; ASSERT(szc < pszc); mutex_exit(mtx); pszc = szc; VM_STAT_ADD(pszclck_stat[2]); goto again; } VM_STAT_ADD(pszclck_stat[3]); /* * current hat_page_demote not done yet. * wait for it to finish. */ mutex_exit(mtx); rootpp = PP_GROUPLEADER(rootpp, rszc); mtx = PAGE_SZC_MUTEX(rootpp); mutex_enter(mtx); mutex_exit(mtx); ASSERT(rootpp->p_szc < rszc); goto again; }
static void netbsd32_sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask) { struct lwp *l = curlwp; struct proc *p = l->l_proc; struct trapframe *tf; int sig = ksi->ksi_signo; sig_t catcher = SIGACTION(p, sig).sa_handler; struct netbsd32_sigframe_sigcontext *fp, frame; int onstack, error; struct sigacts *ps = p->p_sigacts; tf = l->l_md.md_regs; /* Do we need to jump onto the signal stack? */ onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; /* Allocate space for the signal handler context. */ if (onstack) fp = (struct netbsd32_sigframe_sigcontext *) ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); else fp = (struct netbsd32_sigframe_sigcontext *)tf->tf_rsp; fp--; /* Build stack frame for signal trampoline. */ switch (ps->sa_sigdesc[sig].sd_vers) { case 0: frame.sf_ra = (uint32_t)(u_long)p->p_sigctx.ps_sigcode; break; case 1: frame.sf_ra = (uint32_t)(u_long)ps->sa_sigdesc[sig].sd_tramp; break; default: /* Don't know what trampoline version; kill it. */ sigexit(l, SIGILL); } frame.sf_signum = sig; frame.sf_code = ksi->ksi_trap; frame.sf_scp = (uint32_t)(u_long)&fp->sf_sc; frame.sf_sc.sc_ds = tf->tf_ds; frame.sf_sc.sc_es = tf->tf_es; frame.sf_sc.sc_fs = tf->tf_fs; frame.sf_sc.sc_gs = tf->tf_gs; frame.sf_sc.sc_eflags = tf->tf_rflags; frame.sf_sc.sc_edi = tf->tf_rdi; frame.sf_sc.sc_esi = tf->tf_rsi; frame.sf_sc.sc_ebp = tf->tf_rbp; frame.sf_sc.sc_ebx = tf->tf_rbx; frame.sf_sc.sc_edx = tf->tf_rdx; frame.sf_sc.sc_ecx = tf->tf_rcx; frame.sf_sc.sc_eax = tf->tf_rax; frame.sf_sc.sc_eip = tf->tf_rip; frame.sf_sc.sc_cs = tf->tf_cs; frame.sf_sc.sc_esp = tf->tf_rsp; frame.sf_sc.sc_ss = tf->tf_ss; frame.sf_sc.sc_trapno = tf->tf_trapno; frame.sf_sc.sc_err = tf->tf_err; /* Save signal stack. */ frame.sf_sc.sc_onstack = l->l_sigstk.ss_flags & SS_ONSTACK; /* Save signal mask. */ frame.sf_sc.sc_mask = *mask; sendsig_reset(l, sig); mutex_exit(p->p_lock); error = copyout(&frame, fp, sizeof(frame)); mutex_enter(p->p_lock); if (error != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ sigexit(l, SIGILL); /* NOTREACHED */ } /* * Build context to run handler in. */ tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL); /* Ensure FP state is reset, if FP is used. */ l->l_md.md_flags &= ~MDL_USEDFPU; tf->tf_rip = (uint64_t)catcher; tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL); tf->tf_rflags &= ~PSL_CLEARSIG; tf->tf_rsp = (uint64_t)fp; tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL); /* Remember that we're now on the signal stack. */ if (onstack) l->l_sigstk.ss_flags |= SS_ONSTACK; if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS32) { /* * process has given an invalid address for the * handler. Stop it, but do not do it before so * we can return the right info to userland (or in core dump) */ sigexit(l, SIGILL); /* NOTREACHED */ } }
static void netbsd32_sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) { struct lwp *l = curlwp; struct proc *p = l->l_proc; struct sigacts *ps = p->p_sigacts; int onstack, error; int sig = ksi->ksi_signo; struct netbsd32_sigframe_siginfo *fp, frame; sig_t catcher = SIGACTION(p, sig).sa_handler; struct trapframe *tf = l->l_md.md_regs; /* Do we need to jump onto the signal stack? */ onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; /* Allocate space for the signal handler context. */ if (onstack) fp = (struct netbsd32_sigframe_siginfo *) ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); else fp = (struct netbsd32_sigframe_siginfo *)tf->tf_rsp; fp--; /* Build stack frame for signal trampoline. */ switch (ps->sa_sigdesc[sig].sd_vers) { case 0: /* handled by sendsig_sigcontext */ case 1: /* handled by sendsig_sigcontext */ default: /* unknown version */ printf("nsendsig: bad version %d\n", ps->sa_sigdesc[sig].sd_vers); sigexit(l, SIGILL); case 2: break; } frame.sf_ra = (uint32_t)(uintptr_t)ps->sa_sigdesc[sig].sd_tramp; frame.sf_signum = sig; frame.sf_sip = (uint32_t)(uintptr_t)&fp->sf_si; frame.sf_ucp = (uint32_t)(uintptr_t)&fp->sf_uc; netbsd32_si_to_si32(&frame.sf_si, (const siginfo_t *)&ksi->ksi_info); frame.sf_uc.uc_flags = _UC_SIGMASK; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_link = (uint32_t)(uintptr_t)l->l_ctxlink; frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) ? _UC_SETSTACK : _UC_CLRSTACK; memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); sendsig_reset(l, sig); mutex_exit(p->p_lock); cpu_getmcontext32(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); error = copyout(&frame, fp, sizeof(frame)); mutex_enter(p->p_lock); if (error != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ sigexit(l, SIGILL); /* NOTREACHED */ } /* * Build context to run handler in. */ tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_rip = (uint64_t)catcher; tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL); tf->tf_rflags &= ~PSL_CLEARSIG; tf->tf_rsp = (uint64_t)fp; tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL); /* Ensure FP state is reset, if FP is used. */ l->l_md.md_flags &= ~MDL_USEDFPU; /* Remember that we're now on the signal stack. */ if (onstack) l->l_sigstk.ss_flags |= SS_ONSTACK; if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS32) { /* * process has given an invalid address for the * handler. Stop it, but do not do it before so * we can return the right info to userland (or in core dump) */ sigexit(l, SIGILL); /* NOTREACHED */ } }
/* * Health monitor for a single interface. * * The secondary sends ping RPCs to the primary. * The primary just stores the results and updates its structures. */ static void rdc_health_thread(void *arg) { rdc_if_t *ip = (rdc_if_t *)arg; struct rdc_ping ping; struct rdc_ping6 ping6; struct timeval t; int down = 1; int ret, err; int sec = 0; char ifaddr[RDC_MAXADDR]; char r_ifaddr[RDC_MAXADDR]; uint16_t *sp; bcopy(ip->ifaddr.buf, ifaddr, ip->ifaddr.len); sp = (uint16_t *)ifaddr; *sp = htons(*sp); bcopy(ip->r_ifaddr.buf, r_ifaddr, ip->r_ifaddr.len); sp = (uint16_t *)r_ifaddr; *sp = htons(*sp); while ((ip->exiting != 1) && (net_exit != ATM_EXIT)) { delay(HZ); /* setup RPC timeout */ t.tv_sec = rdc_rpc_tmout; t.tv_usec = 0; if (ip->issecondary && !ip->no_ping) { if (ip->rpc_version < RDC_VERSION7) { bcopy(ip->r_ifaddr.buf, ping6.p_ifaddr, RDC_MAXADDR); /* primary ifaddr */ bcopy(ip->ifaddr.buf, ping6.s_ifaddr, RDC_MAXADDR); /* secondary ifaddr */ err = rdc_clnt_call_any(ip->srv, ip, RDCPROC_PING4, xdr_rdc_ping6, (char *)&ping6, xdr_int, (char *)&ret, &t); } else { ping.p_ifaddr.buf = r_ifaddr; ping.p_ifaddr.len = ip->r_ifaddr.len; ping.p_ifaddr.maxlen = ip->r_ifaddr.len; ping.s_ifaddr.buf = ifaddr; ping.s_ifaddr.len = ip->ifaddr.len; ping.s_ifaddr.maxlen = ip->ifaddr.len; err = rdc_clnt_call_any(ip->srv, ip, RDCPROC_PING4, xdr_rdc_ping, (char *)&ping, xdr_int, (char *)&ret, &t); } if (err || ret) { /* RPC failed - link is down */ if (!down && !ip->isprimary) { /* * don't print messages if also * a primary - the primary will * take care of it. */ rdc_if_down(ip); down = 1; } rdc_dump_alloc_bufs(ip); ip->no_ping = 1; /* * Start back at the max possible version * since the remote server could come back * on a different protocol version. */ mutex_enter(&rdc_ping_lock); ip->rpc_version = RDC_VERS_MAX; mutex_exit(&rdc_ping_lock); } else { if (down && !ip->isprimary) { /* * was failed, but now ok * * don't print messages if also * a primary - the primary will * take care of it. */ rdc_if_up(ip); down = 0; } } } if (!ip->isprimary && down && ++sec == 5) { sec = 0; rdc_dump_alloc_bufs(ip); } if (ip->isprimary) rdc_update_health(ip); } /* signal that this thread is done */ ip->exiting = 2; }
int compat_13_netbsd32_sigreturn(struct lwp *l, const struct compat_13_netbsd32_sigreturn_args *uap, register_t *retval) { /* { syscallarg(struct netbsd32_sigcontext13 *) sigcntxp; } */ struct proc *p = l->l_proc; struct netbsd32_sigcontext13 *scp, context; struct trapframe *tf; sigset_t mask; int error; /* * The trampoline code hands us the context. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ scp = (struct netbsd32_sigcontext13 *)NETBSD32PTR64(SCARG(uap, sigcntxp)); if (copyin((void *)scp, &context, sizeof(*scp)) != 0) return (EFAULT); /* Restore register context. */ tf = l->l_md.md_regs; /* * Check for security violations. */ error = check_sigcontext32(l, (const struct netbsd32_sigcontext *)&context); if (error != 0) return error; tf->tf_gs = context.sc_gs; tf->tf_fs = context.sc_fs; tf->tf_es = context.sc_es; tf->tf_ds = context.sc_ds; tf->tf_rflags = context.sc_eflags; tf->tf_rdi = context.sc_edi; tf->tf_rsi = context.sc_esi; tf->tf_rbp = context.sc_ebp; tf->tf_rbx = context.sc_ebx; tf->tf_rdx = context.sc_edx; tf->tf_rcx = context.sc_ecx; tf->tf_rax = context.sc_eax; tf->tf_rip = context.sc_eip; tf->tf_cs = context.sc_cs; tf->tf_rsp = context.sc_esp; tf->tf_ss = context.sc_ss; mutex_enter(p->p_lock); /* Restore signal stack. */ if (context.sc_onstack & SS_ONSTACK) l->l_sigstk.ss_flags |= SS_ONSTACK; else l->l_sigstk.ss_flags &= ~SS_ONSTACK; /* Restore signal mask. */ native_sigset13_to_sigset((sigset13_t *)&context.sc_mask, &mask); (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); mutex_exit(p->p_lock); return (EJUSTRETURN); }
/* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int compat_13_sys_sigreturn(struct lwp *l, const struct compat_13_sys_sigreturn_args *uap, register_t *retval) { /* { syscallarg(struct sigcontext13 *) sigcntxp; } */ struct proc *p = l->l_proc; struct sigcontext13 *scp; struct frame *frame; struct sigcontext13 tsigc; sigset_t mask; /* * The trampoline code hands us the context. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ scp = SCARG(uap, sigcntxp); if ((int)scp & 1) return EINVAL; if (copyin(scp, &tsigc, sizeof(tsigc)) != 0) return EFAULT; scp = &tsigc; /* Make sure the user isn't pulling a fast one on us! */ if ((scp->sc_ps & (PSL_MBZ|PSL_IPL|PSL_S)) != 0) return EINVAL; /* Restore register context. */ frame = (struct frame *)l->l_md.md_regs; /* * We only support restoring the sigcontext13 in this call. * We are not called from the sigcode (per sendsig()), so * we will not have a sigstate to restore. */ if (scp->sc_ap != 0) return EINVAL; /* * Restore the user supplied information. * This should be at the last so that the error (EINVAL) * is reported to the sigreturn caller, not to the * jump destination. */ frame->f_regs[SP] = scp->sc_sp; frame->f_regs[A6] = scp->sc_fp; frame->f_pc = scp->sc_pc; frame->f_sr = scp->sc_ps; mutex_enter(p->p_lock); /* Restore signal stack. */ if (scp->sc_onstack & SS_ONSTACK) l->l_sigstk.ss_flags |= SS_ONSTACK; else l->l_sigstk.ss_flags &= ~SS_ONSTACK; /* Restore signal mask. */ native_sigset13_to_sigset(&scp->sc_mask, &mask); (void)sigprocmask1(l, SIG_SETMASK, &mask, 0); mutex_exit(p->p_lock); return EJUSTRETURN; }
void linux32_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) { struct lwp *l = curlwp; struct proc *p = l->l_proc; struct trapframe *tf; struct linux32_sigframe *fp, frame; int onstack, error; int sig = ksi->ksi_signo; sig_t catcher = SIGACTION(p, sig).sa_handler; struct sigaltstack *sas = &l->l_sigstk; tf = l->l_md.md_regs; /* Do we need to jump onto the signal stack? */ onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; /* Allocate space for the signal handler context. */ if (onstack) fp = (struct linux32_sigframe *)((char *)sas->ss_sp + sas->ss_size); else fp = (struct linux32_sigframe *)tf->tf_rsp; fp--; DPRINTF(("old: onstack = %d, fp = %p sig = %d rip = 0x%lx\n", onstack, fp, sig, tf->tf_rip)); /* Build stack frame for signal trampoline. */ NETBSD32PTR32(frame.sf_handler, catcher); frame.sf_sig = native_to_linux32_signo[sig]; linux32_save_sigcontext(l, tf, mask, &frame.sf_sc); sendsig_reset(l, sig); mutex_exit(p->p_lock); error = copyout(&frame, fp, sizeof(frame)); mutex_enter(p->p_lock); if (error != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ sigexit(l, SIGILL); /* NOTREACHED */ } /* * Build context to run handler in. */ tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; tf->tf_rip = ((long)p->p_sigctx.ps_sigcode) & 0xffffffff; tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL) & 0xffffffff; tf->tf_rflags &= ~PSL_CLEARSIG & 0xffffffff; tf->tf_rsp = (long)fp & 0xffffffff; tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL) & 0xffffffff; /* Remember that we're now on the signal stack. */ if (onstack) sas->ss_flags |= SS_ONSTACK; return; }
static int linux32_restore_sigcontext(struct lwp *l, struct linux32_sigcontext *scp, register_t *retval) { struct trapframe *tf; struct proc *p = l->l_proc; struct sigaltstack *sas = &l->l_sigstk; struct pcb *pcb; sigset_t mask; ssize_t ss_gap; register_t fssel, gssel; /* Restore register context. */ tf = l->l_md.md_regs; pcb = lwp_getpcb(l); DPRINTF(("sigreturn enter rsp=0x%lx rip=0x%lx\n", tf->tf_rsp, tf->tf_rip)); /* * Check for security violations. */ if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0 || !USERMODE(scp->sc_cs, scp->sc_eflags)) return EINVAL; if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs) && !(VALID_USER_FSEL32(scp->sc_fs) && pcb->pcb_fs != 0)) return EINVAL; if (scp->sc_gs != 0 && !VALID_USER_DSEL32(scp->sc_gs) && !(VALID_USER_GSEL32(scp->sc_gs) && pcb->pcb_gs != 0)) return EINVAL; if (scp->sc_es != 0 && !VALID_USER_DSEL32(scp->sc_es)) return EINVAL; if (!VALID_USER_DSEL32(scp->sc_ds) || !VALID_USER_DSEL32(scp->sc_ss)) return EINVAL; if (scp->sc_eip >= VM_MAXUSER_ADDRESS32) return EINVAL; gssel = (register_t)scp->sc_gs & 0xffff; fssel = (register_t)scp->sc_fs & 0xffff; cpu_fsgs_reload(l, fssel, gssel); tf->tf_es = (register_t)scp->sc_es & 0xffff; tf->tf_ds = (register_t)scp->sc_ds & 0xffff; tf->tf_rflags &= ~PSL_USER; tf->tf_rflags |= ((register_t)scp->sc_eflags & PSL_USER); tf->tf_rdi = (register_t)scp->sc_edi & 0xffffffff; tf->tf_rsi = (register_t)scp->sc_esi & 0xffffffff; tf->tf_rbp = (register_t)scp->sc_ebp & 0xffffffff; tf->tf_rbx = (register_t)scp->sc_ebx & 0xffffffff; tf->tf_rdx = (register_t)scp->sc_edx & 0xffffffff; tf->tf_rcx = (register_t)scp->sc_ecx & 0xffffffff; tf->tf_rax = (register_t)scp->sc_eax & 0xffffffff; tf->tf_rip = (register_t)scp->sc_eip & 0xffffffff; tf->tf_cs = (register_t)scp->sc_cs & 0xffff; tf->tf_rsp = (register_t)scp->sc_esp_at_signal & 0xffffffff; tf->tf_ss = (register_t)scp->sc_ss & 0xffff; mutex_enter(p->p_lock); /* Restore signal stack. */ ss_gap = (ssize_t) ((char *)NETBSD32IPTR64(scp->sc_esp_at_signal) - (char *)sas->ss_sp); if (ss_gap >= 0 && ss_gap < sas->ss_size) sas->ss_flags |= SS_ONSTACK; else sas->ss_flags &= ~SS_ONSTACK; /* Restore signal mask. */ linux32_old_to_native_sigset(&mask, &scp->sc_mask); (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); mutex_exit(p->p_lock); DPRINTF(("linux32_sigreturn: rip = 0x%lx, rsp = 0x%lx, flags = 0x%lx\n", tf->tf_rip, tf->tf_rsp, tf->tf_rflags)); return EJUSTRETURN; }
/* * Unlink zp from dl, and mark zp for deletion if this was the last link. Can * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY). * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. * If it's non-NULL, we use it to indicate whether the znode needs deletion, * and it's the caller's job to do it. */ int zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, boolean_t *unlinkedp) { znode_t *dzp = dl->dl_dzp; zfs_sb_t *zsb = ZTOZSB(dzp); int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); boolean_t unlinked = B_FALSE; sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; int count = 0; int error; #ifdef HAVE_DNLC dnlc_remove(ZTOI(dzp), dl->dl_name); #endif /* HAVE_DNLC */ if (!(flag & ZRENAMING)) { mutex_enter(&zp->z_lock); if (zp_is_dir && !zfs_dirempty(zp)) { mutex_exit(&zp->z_lock); return (SET_ERROR(ENOTEMPTY)); } /* * If we get here, we are going to try to remove the object. * First try removing the name from the directory; if that * fails, return the error. */ error = zfs_dropname(dl, zp, dzp, tx, flag); if (error != 0) { mutex_exit(&zp->z_lock); return (error); } if (zp->z_links <= zp_is_dir) { zfs_panic_recover("zfs: link count on %lu is %u, " "should be at least %u", zp->z_id, (int)zp->z_links, zp_is_dir + 1); zp->z_links = zp_is_dir + 1; } if (--zp->z_links == zp_is_dir) { zp->z_unlinked = B_TRUE; zp->z_links = 0; unlinked = B_TRUE; } else { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); count = 0; ASSERT(error == 0); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); if (error != 0) return (error); } mutex_enter(&dzp->z_lock); dzp->z_size--; /* one dirent removed */ dzp->z_links -= zp_is_dir; /* ".." link from zp */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &dzp->z_links, sizeof (dzp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); mutex_exit(&dzp->z_lock); if (unlinkedp != NULL) *unlinkedp = unlinked; else if (unlinked) zfs_unlinked_add(zp, tx); return (0); }
int cpu_setmcontext32(struct lwp *l, const mcontext32_t *mcp, unsigned int flags) { struct trapframe *tf = l->l_md.md_regs; const __greg32_t *gr = mcp->__gregs; struct proc *p = l->l_proc; int error; /* Restore register context, if any. */ if ((flags & _UC_CPU) != 0) { /* * Check for security violations. */ error = cpu_mcontext32_validate(l, mcp); if (error != 0) return error; cpu_fsgs_reload(l, gr[_REG32_FS], gr[_REG32_GS]); tf->tf_es = gr[_REG32_ES]; tf->tf_ds = gr[_REG32_DS]; /* Only change the user-alterable part of eflags */ tf->tf_rflags &= ~PSL_USER; tf->tf_rflags |= (gr[_REG32_EFL] & PSL_USER); tf->tf_rdi = gr[_REG32_EDI]; tf->tf_rsi = gr[_REG32_ESI]; tf->tf_rbp = gr[_REG32_EBP]; tf->tf_rbx = gr[_REG32_EBX]; tf->tf_rdx = gr[_REG32_EDX]; tf->tf_rcx = gr[_REG32_ECX]; tf->tf_rax = gr[_REG32_EAX]; tf->tf_rip = gr[_REG32_EIP]; tf->tf_cs = gr[_REG32_CS]; tf->tf_rsp = gr[_REG32_UESP]; tf->tf_ss = gr[_REG32_SS]; } if ((flags & _UC_TLSBASE) != 0) lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); /* Restore floating point register context, if any. */ if ((flags & _UC_FPU) != 0) { struct pcb *pcb = lwp_getpcb(l); /* * If we were using the FPU, forget that we were. */ if (pcb->pcb_fpcpu != NULL) { fpusave_lwp(l, false); } memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs, sizeof (pcb->pcb_savefpu.fp_fxsave)); /* If not set already. */ l->l_md.md_flags |= MDL_USEDFPU; } mutex_enter(p->p_lock); if (flags & _UC_SETSTACK) l->l_sigstk.ss_flags |= SS_ONSTACK; if (flags & _UC_CLRSTACK) l->l_sigstk.ss_flags &= ~SS_ONSTACK; mutex_exit(p->p_lock); return (0); }
/* ARGSUSED */ int mfs_start(struct mount *mp, int flags) { struct vnode *vp; struct mfsnode *mfsp; struct proc *p; struct buf *bp; void *base; int sleepreturn = 0, refcnt, error; ksiginfoq_t kq; /* * Ensure that file system is still mounted when getting mfsnode. * Add a reference to the mfsnode to prevent it disappearing in * this routine. */ if ((error = vfs_busy(mp, NULL)) != 0) return error; vp = VFSTOUFS(mp)->um_devvp; mfsp = VTOMFS(vp); mutex_enter(&mfs_lock); mfsp->mfs_refcnt++; mutex_exit(&mfs_lock); vfs_unbusy(mp, false, NULL); base = mfsp->mfs_baseoff; mutex_enter(&mfs_lock); while (mfsp->mfs_shutdown != 1) { while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) { mutex_exit(&mfs_lock); mfs_doio(bp, base); mutex_enter(&mfs_lock); } /* * If a non-ignored signal is received, try to unmount. * If that fails, or the filesystem is already in the * process of being unmounted, clear the signal (it has been * "processed"), otherwise we will loop here, as tsleep * will always return EINTR/ERESTART. */ if (sleepreturn != 0) { mutex_exit(&mfs_lock); if (dounmount(mp, 0, curlwp) != 0) { p = curproc; ksiginfo_queue_init(&kq); mutex_enter(p->p_lock); sigclearall(p, NULL, &kq); mutex_exit(p->p_lock); ksiginfo_queue_drain(&kq); } sleepreturn = 0; mutex_enter(&mfs_lock); continue; } sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock); } KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL); refcnt = --mfsp->mfs_refcnt; mutex_exit(&mfs_lock); if (refcnt == 0) { bufq_free(mfsp->mfs_buflist); cv_destroy(&mfsp->mfs_cv); kmem_free(mfsp, sizeof(*mfsp)); } return (sleepreturn); }
/*ARGSUSED*/ int dump_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rvalp) { uint64_t size; uint64_t dumpsize_in_pages; int error = 0; char *pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); char uuidbuf[36 + 1]; size_t len; vnode_t *vp; switch (cmd) { case DIOCGETDUMPSIZE: if (dump_conflags & DUMP_ALL) size = ptob((uint64_t)physmem) / DUMP_COMPRESS_RATIO; else { /* * We can't give a good answer for the DUMP_CURPROC * because we won't know which process to use until it * causes a panic. We'll therefore punt and give the * caller the size for the kernel. * * This kernel size equation takes care of the * boot time kernel footprint and also accounts * for availrmem changes due to user explicit locking. * Refer to common/vm/vm_page.c for an explanation * of these counters. */ dumpsize_in_pages = (physinstalled - obp_pages - availrmem - anon_segkp_pages_locked - k_anoninfo.ani_mem_resv - pages_locked - pages_claimed - pages_useclaim); /* * Protect against vm vagaries. */ if (dumpsize_in_pages > (uint64_t)physmem) dumpsize_in_pages = (uint64_t)physmem; size = ptob(dumpsize_in_pages) / DUMP_COMPRESS_RATIO; } if (copyout(&size, (void *)arg, sizeof (size)) < 0) error = EFAULT; break; case DIOCGETCONF: mutex_enter(&dump_lock); *rvalp = dump_conflags; if (dumpvp && !(dumpvp->v_flag & VISSWAP)) *rvalp |= DUMP_EXCL; mutex_exit(&dump_lock); break; case DIOCSETCONF: mutex_enter(&dump_lock); if (arg == DUMP_KERNEL || arg == DUMP_ALL || arg == DUMP_CURPROC) dump_conflags = arg; else error = EINVAL; mutex_exit(&dump_lock); break; case DIOCGETDEV: mutex_enter(&dump_lock); if (dumppath == NULL) { mutex_exit(&dump_lock); error = ENODEV; break; } (void) strcpy(pathbuf, dumppath); mutex_exit(&dump_lock); error = copyoutstr(pathbuf, (void *)arg, MAXPATHLEN, NULL); break; case DIOCSETDEV: case DIOCTRYDEV: if ((error = copyinstr((char *)arg, pathbuf, MAXPATHLEN, NULL)) != 0 || (error = lookupname(pathbuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) break; mutex_enter(&dump_lock); if (vp->v_type == VBLK) error = dumpinit(vp, pathbuf, cmd == DIOCTRYDEV); else error = ENOTBLK; mutex_exit(&dump_lock); VN_RELE(vp); break; case DIOCDUMP: mutex_enter(&dump_lock); if (dumpvp == NULL) error = ENODEV; else if (dumpvp->v_flag & VISSWAP) error = EBUSY; else dumpsys(); mutex_exit(&dump_lock); break; case DIOCSETUUID: if ((error = copyinstr((char *)arg, uuidbuf, sizeof (uuidbuf), &len)) != 0) break; if (len != 37) { error = EINVAL; break; } error = dump_set_uuid(uuidbuf); break; case DIOCGETUUID: error = copyoutstr(dump_get_uuid(), (void *)arg, 37, NULL); break; case DIOCRMDEV: mutex_enter(&dump_lock); if (dumpvp != NULL) dumpfini(); mutex_exit(&dump_lock); break; default: error = ENXIO; } kmem_free(pathbuf, MAXPATHLEN); return (error); }
/* * Read the comments inside of page_lock_es() carefully. * * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. * This is used by threads subject to reader-starvation (eg. memory delete). * * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, * it is expected that it will retry at a later time. Threads that will * not retry the lock *must* call page_lock_clr_exclwanted to clear the * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, * the bit is cleared.) */ int page_try_reclaim_lock(page_t *pp, se_t se, int es) { kmutex_t *pse = PAGE_SE_MUTEX(pp); selock_t old; mutex_enter(pse); old = pp->p_selock; ASSERT(((es & SE_EXCL_WANTED) == 0) || ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { mutex_exit(pse); VM_STAT_ADD(page_trylock_failed); return (0); } if (se == SE_SHARED && es == 1 && old == 0) { se = SE_EXCL; } if (se == SE_SHARED) { if (!PP_ISFREE(pp)) { if (old >= 0) { /* * Readers are not allowed when excl wanted */ if ((old & SE_EWANTED) == 0) { pp->p_selock = old + SE_READER; mutex_exit(pse); return (1); } } mutex_exit(pse); return (0); } /* * The page is free, so we really want SE_EXCL (below) */ VM_STAT_ADD(page_try_reclaim_upgrade); } /* * The caller wants a writer lock. We try for it only if * SE_EWANTED is not set, or if the caller specified * SE_EXCL_WANTED. */ if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) { if ((old & ~SE_EWANTED) == 0) { /* no reader/writer lock held */ THREAD_KPRI_REQUEST(); /* this clears out our setting of the SE_EWANTED bit */ pp->p_selock = SE_WRITER; mutex_exit(pse); return (1); } } if (es & SE_EXCL_WANTED) { /* page is locked, set the SE_EWANTED bit */ pp->p_selock |= SE_EWANTED; } mutex_exit(pse); return (0); }
/* * dcopy_cmd_poll() */ int dcopy_cmd_poll(dcopy_cmd_t cmd, int flags) { dcopy_handle_t channel; dcopy_cmd_priv_t priv; int e; priv = cmd->dp_private; channel = priv->pr_channel; /* * if the caller is trying to block, they needed to post the * command with DCOPY_CMD_INTR set. */ if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) { return (DCOPY_FAILURE); } atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64); repoll: e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd); if (e == DCOPY_PENDING) { /* * if the command is still active, and the blocking flag * is set. */ if (flags & DCOPY_POLL_BLOCK) { /* * if we haven't initialized the state, do it now. A * command can be re-used, so it's possible it's * already been initialized. */ if (!priv->pr_block_init) { priv->pr_block_init = B_TRUE; mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER, NULL); cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL); priv->pr_cmd = cmd; } /* push it on the list for blocking commands */ priv->pr_wait = B_TRUE; dcopy_list_push(&channel->ch_poll_list, priv); mutex_enter(&priv->pr_mutex); /* * it's possible we already cleared pr_wait before we * grabbed the mutex. */ if (priv->pr_wait) { cv_wait(&priv->pr_cv, &priv->pr_mutex); } mutex_exit(&priv->pr_mutex); /* * the command has completed, go back and poll so we * get the status. */ goto repoll; } } return (e); }
static void auvia_attach(device_t parent, device_t self, void *aux) { struct pci_attach_args *pa; struct auvia_softc *sc; const char *intrstr; pci_chipset_tag_t pc; pcitag_t pt; pci_intr_handle_t ih; pcireg_t pr; int r; const char *revnum; /* VT823xx revision number */ char intrbuf[PCI_INTRSTR_LEN]; pa = aux; sc = device_private(self); sc->sc_dev = self; intrstr = NULL; pc = pa->pa_pc; pt = pa->pa_tag; revnum = NULL; aprint_naive(": Audio controller\n"); sc->sc_play.sc_base = AUVIA_PLAY_BASE; sc->sc_record.sc_base = AUVIA_RECORD_BASE; if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VIATECH_VT8233_AC97) { sc->sc_flags |= AUVIA_FLAGS_VT8233; sc->sc_play.sc_base = VIA8233_MP_BASE; sc->sc_record.sc_base = VIA8233_WR_BASE; } if (pci_mapreg_map(pa, 0x10, PCI_MAPREG_TYPE_IO, 0, &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize)) { aprint_error(": can't map i/o space\n"); return; } sc->sc_dmat = pa->pa_dmat; sc->sc_pc = pc; sc->sc_pt = pt; r = PCI_REVISION(pa->pa_class); if (sc->sc_flags & AUVIA_FLAGS_VT8233) { snprintf(sc->sc_revision, sizeof(sc->sc_revision), "0x%02X", r); switch(r) { case VIA_REV_8233PRE: /* same as 8233, but should not be in the market */ revnum = "3-Pre"; break; case VIA_REV_8233C: /* 2 rec, 4 pb, 1 multi-pb */ revnum = "3C"; break; case VIA_REV_8233: /* 2 rec, 4 pb, 1 multi-pb, spdif */ revnum = "3"; break; case VIA_REV_8233A: /* 1 rec, 1 multi-pb, spdif */ revnum = "3A"; break; default: break; } if (r >= VIA_REV_8237) revnum = "7"; else if (r >= VIA_REV_8235) /* 2 rec, 4 pb, 1 multi-pb, spdif */ revnum = "5"; aprint_normal(": VIA Technologies VT823%s AC'97 Audio " "(rev %s)\n", revnum, sc->sc_revision); } else { sc->sc_revision[1] = '\0'; if (r == 0x20) { sc->sc_revision[0] = 'H'; } else if ((r >= 0x10) && (r <= 0x14)) { sc->sc_revision[0] = 'A' + (r - 0x10); } else { snprintf(sc->sc_revision, sizeof(sc->sc_revision), "0x%02X", r); } aprint_normal(": VIA Technologies VT82C686A AC'97 Audio " "(rev %s)\n", sc->sc_revision); } if (pci_intr_map(pa, &ih)) { aprint_error(": couldn't map interrupt\n"); bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); return; } intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf)); mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_AUDIO); sc->sc_ih = pci_intr_establish(pc, ih, IPL_AUDIO, auvia_intr, sc); if (sc->sc_ih == NULL) { aprint_error_dev(sc->sc_dev, "couldn't establish interrupt"); if (intrstr != NULL) aprint_error(" at %s", intrstr); aprint_error("\n"); bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); mutex_destroy(&sc->sc_lock); mutex_destroy(&sc->sc_intr_lock); return; } aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); /* disable SBPro compat & others */ pr = pci_conf_read(pc, pt, AUVIA_PCICONF_JUNK); pr &= ~AUVIA_PCICONF_ENABLES; /* clear compat function enables */ /* XXX what to do about MIDI, FM, joystick? */ pr |= (AUVIA_PCICONF_ACLINKENAB | AUVIA_PCICONF_ACNOTRST | AUVIA_PCICONF_ACVSR | AUVIA_PCICONF_ACSGD); pr &= ~(AUVIA_PCICONF_ACFM | AUVIA_PCICONF_ACSB); pci_conf_write(pc, pt, AUVIA_PCICONF_JUNK, pr); sc->host_if.arg = sc; sc->host_if.attach = auvia_attach_codec; sc->host_if.read = auvia_read_codec; sc->host_if.write = auvia_write_codec; sc->host_if.reset = auvia_reset_codec; sc->host_if.spdif_event = auvia_spdif_event; if ((r = ac97_attach(&sc->host_if, self, &sc->sc_lock)) != 0) { aprint_error_dev(sc->sc_dev, "can't attach codec (error 0x%X)\n", r); pci_intr_disestablish(pc, sc->sc_ih); bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); mutex_destroy(&sc->sc_lock); mutex_destroy(&sc->sc_intr_lock); return; } /* setup audio_format */ memcpy(sc->sc_formats, auvia_formats, sizeof(auvia_formats)); mutex_enter(&sc->sc_lock); if (sc->sc_play.sc_base != VIA8233_MP_BASE || !AC97_IS_4CH(sc->codec_if)) { AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_4CH_8]); AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_4CH_16]); } if (sc->sc_play.sc_base != VIA8233_MP_BASE || !AC97_IS_6CH(sc->codec_if)) { AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_6CH_8]); AUFMT_INVALIDATE(&sc->sc_formats[AUVIA_FORMATS_6CH_16]); } if (AC97_IS_FIXED_RATE(sc->codec_if)) { for (r = 0; r < AUVIA_NFORMATS; r++) { sc->sc_formats[r].frequency_type = 1; sc->sc_formats[r].frequency[0] = 48000; } } mutex_exit(&sc->sc_lock); if (0 != auconv_create_encodings(sc->sc_formats, AUVIA_NFORMATS, &sc->sc_encodings)) { mutex_enter(&sc->sc_lock); sc->codec_if->vtbl->detach(sc->codec_if); mutex_exit(&sc->sc_lock); pci_intr_disestablish(pc, sc->sc_ih); bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); mutex_destroy(&sc->sc_lock); mutex_destroy(&sc->sc_intr_lock); aprint_error_dev(sc->sc_dev, "can't create encodings\n"); return; } if (0 != auconv_create_encodings(auvia_spdif_formats, AUVIA_SPDIF_NFORMATS, &sc->sc_spdif_encodings)) { mutex_enter(&sc->sc_lock); sc->codec_if->vtbl->detach(sc->codec_if); mutex_exit(&sc->sc_lock); pci_intr_disestablish(pc, sc->sc_ih); bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize); mutex_destroy(&sc->sc_lock); mutex_destroy(&sc->sc_intr_lock); aprint_error_dev(sc->sc_dev, "can't create spdif encodings\n"); return; } if (!pmf_device_register(self, NULL, auvia_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); audio_attach_mi(&auvia_hw_if, sc, sc->sc_dev); mutex_enter(&sc->sc_lock); sc->codec_if->vtbl->unlock(sc->codec_if); mutex_exit(&sc->sc_lock); return; }
/* * dcopy_device_register() */ int dcopy_device_register(void *device_private, dcopy_device_info_t *info, dcopy_device_handle_t *handle) { struct dcopy_channel_s *channel; struct dcopy_device_s *device; int e; int i; /* initialize the per device state */ device = kmem_zalloc(sizeof (*device), KM_SLEEP); device->dc_device_private = device_private; device->dc_info = *info; device->dc_removing_cnt = 0; device->dc_cb = info->di_cb; /* * we have a per device channel list so we can remove a device in the * future. */ e = dcopy_list_init(&device->dc_devchan_list, sizeof (struct dcopy_channel_s), offsetof(struct dcopy_channel_s, ch_devchan_list_node)); if (e != DCOPY_SUCCESS) { goto registerfail_devchan; } /* * allocate state for each channel, allocate the channel, and then add * the devices dma channels to the devices channel list. */ for (i = 0; i < info->di_num_dma; i++) { channel = kmem_zalloc(sizeof (*channel), KM_SLEEP); channel->ch_device = device; channel->ch_removing = B_FALSE; channel->ch_ref_cnt = 0; channel->ch_cb = info->di_cb; e = info->di_cb->cb_channel_alloc(device_private, channel, DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info, &channel->ch_channel_private); if (e != DCOPY_SUCCESS) { kmem_free(channel, sizeof (*channel)); goto registerfail_alloc; } e = dcopy_stats_init(channel); if (e != DCOPY_SUCCESS) { info->di_cb->cb_channel_free( &channel->ch_channel_private); kmem_free(channel, sizeof (*channel)); goto registerfail_alloc; } e = dcopy_list_init(&channel->ch_poll_list, sizeof (struct dcopy_cmd_priv_s), offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node)); if (e != DCOPY_SUCCESS) { dcopy_stats_fini(channel); info->di_cb->cb_channel_free( &channel->ch_channel_private); kmem_free(channel, sizeof (*channel)); goto registerfail_alloc; } dcopy_list_push(&device->dc_devchan_list, channel); } /* add the device to device list */ dcopy_list_push(&dcopy_statep->d_device_list, device); /* * add the device's dma channels to the global channel list (where * dcopy_alloc's come from) */ mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex); mutex_enter(&dcopy_statep->d_device_list.dl_mutex); channel = list_head(&device->dc_devchan_list.dl_list); while (channel != NULL) { list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list, channel); dcopy_statep->d_globalchan_list.dl_cnt++; channel = list_next(&device->dc_devchan_list.dl_list, channel); } mutex_exit(&dcopy_statep->d_device_list.dl_mutex); mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex); *handle = device; /* last call-back into kernel for dcopy KAPI enabled */ uioa_dcopy_enable(); return (DCOPY_SUCCESS); registerfail_alloc: channel = list_head(&device->dc_devchan_list.dl_list); while (channel != NULL) { /* remove from the list */ channel = dcopy_list_pop(&device->dc_devchan_list); ASSERT(channel != NULL); dcopy_list_fini(&channel->ch_poll_list); dcopy_stats_fini(channel); info->di_cb->cb_channel_free(&channel->ch_channel_private); kmem_free(channel, sizeof (*channel)); } dcopy_list_fini(&device->dc_devchan_list); registerfail_devchan: kmem_free(device, sizeof (*device)); return (DCOPY_FAILURE); }
/* ARGSUSED */ static int notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp) { int cmd; pid_t pid; md_event_queue_t *event_queue; md_event_t *event; cred_t *credp; char *q_name; int err = 0; size_t sz = 0; md_event_ioctl_t *ioctl; sz = sizeof (*ioctl); ioctl = kmem_zalloc(sz, KM_SLEEP); if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) { err = EFAULT; goto out; } if (ioctl->mdn_rev != MD_NOTIFY_REVISION) { err = EINVAL; goto out; } if (ioctl->mdn_magic != MD_EVENT_ID) { err = EINVAL; goto out; } pid = md_getpid(); cmd = ioctl->mdn_cmd; q_name = ioctl->mdn_name; if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count)) md_reaper(); if ((cmd != EQ_ON) && (cmd != EQ_PUT)) { mutex_enter(&md_eventq_mx); if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) { mutex_exit(&md_eventq_mx); (void) notify_fillin_empty_ioctl ((void *)ioctl, ioctl_in, sz, mode); err = ENOENT; goto out; } } switch (cmd) { case EQ_ON: md_reaper(); mutex_enter(&md_eventq_mx); if (md_find_event_queue(q_name, 0) != NULL) { mutex_exit(&md_eventq_mx); err = EEXIST; break; } /* allocate and initialize queue head */ event_queue = (md_event_queue_t *) kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP); if (event_queue == NULL) { mutex_exit(&md_eventq_mx); err = ENOMEM; break; } cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL); event_queue->mdn_flags = 0; event_queue->mdn_pid = pid; event_queue->mdn_proc = md_getproc(); event_queue->mdn_size = 0; event_queue->mdn_front = NULL; event_queue->mdn_tail = NULL; event_queue->mdn_waiting = 0; event_queue->mdn_nextq = NULL; credp = ddi_get_cred(); event_queue->mdn_uid = crgetuid(credp); bcopy(q_name, event_queue->mdn_name, MD_NOTIFY_NAME_SIZE); if (ioctl->mdn_flags & EQ_Q_PERM) event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM; /* link into the list of event queues */ if (md_event_queue != NULL) event_queue->mdn_nextq = md_event_queue; md_event_queue = event_queue; mutex_exit(&md_eventq_mx); err = 0; break; case EQ_OFF: if (md_event_queue == NULL) return (ENOENT); event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY; event_queue->mdn_pid = 0; event_queue->mdn_proc = NULL; if (event_queue->mdn_waiting != 0) cv_broadcast(&event_queue->mdn_cv); /* * force the reaper to delete this when it has no process * waiting on it. */ mutex_exit(&md_eventq_mx); md_reaper(); err = 0; break; case EQ_GET_NOWAIT: case EQ_GET_WAIT: if (cmd == EQ_GET_WAIT) { err = md_wait_for_event(event_queue, ioctl_in, ioctl, sz, mode, lockp); if (err == EINTR) goto out; } ASSERT(MUTEX_HELD(&md_eventq_mx)); if (event_queue->mdn_flags & (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) { event_queue->mdn_flags &= ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL); mutex_exit(&md_eventq_mx); err = notify_fillin_empty_ioctl ((void *)ioctl, ioctl_in, sz, mode); ioctl->mdn_event = EQ_NOTIFY_LOST; err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); if (err) err = EFAULT; goto out; } if (event_queue->mdn_front != NULL) { event = event_queue->mdn_front; event_queue->mdn_front = event->mdn_next; event_queue->mdn_size--; if (event_queue->mdn_front == NULL) event_queue->mdn_tail = NULL; mutex_exit(&md_eventq_mx); ioctl->mdn_tag = event->mdn_tag; ioctl->mdn_set = event->mdn_set; ioctl->mdn_dev = event->mdn_dev; ioctl->mdn_event = event->mdn_event; ioctl->mdn_user = event->mdn_user; ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec; ioctl->mdn_time.tv_usec = event->mdn_time.tv_usec; kmem_free(event, sizeof (md_event_t)); err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); if (err) err = EFAULT; goto out; } else { /* no elements on queue */ mutex_exit(&md_eventq_mx); err = notify_fillin_empty_ioctl ((void *)ioctl, ioctl_in, sz, mode); if (err) err = EFAULT; } if (cmd == EQ_GET_NOWAIT) err = EAGAIN; goto out; case EQ_PUT: if (!md_event_queue) { err = ENOENT; break; } md_put_event(ioctl->mdn_tag, ioctl->mdn_set, ioctl->mdn_dev, ioctl->mdn_event, ioctl->mdn_user); err = 0; goto out; default: err = EINVAL; goto out; } out: kmem_free(ioctl, sz); return (err); }
/* * Lock a directory entry. A dirlock on <dzp, name> protects that name * in dzp's directory zap object. As long as you hold a dirlock, you can * assume two things: (1) dzp cannot be reaped, and (2) no other thread * can change the zap entry for (i.e. link or unlink) this name. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory * ZCILOOK: On a mixed sensitivity file system, * this lookup should be case-insensitive. * ZCIEXACT: On a purely case-insensitive file system, * this lookup should be case-sensitive. * ZRENAMING: we are locking for renaming, force narrow locks * ZHAVELOCK: Don't grab the z_name_lock for this call. The * current thread already holds it. * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) * dlpp - pointer to the dirlock for this entry (NULL on error) * direntflags - (case-insensitive lookup only) * flags if multiple case-sensitive matches exist in directory * realpnp - (case-insensitive lookup only) * actual name matched within the directory * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. * NOTE: For case-insensitive file systems we take wide locks (see below), * but return znode pointers to a single match. */ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp) { zfs_sb_t *zsb = ZTOZSB(dzp); zfs_dirlock_t *dl; boolean_t update; boolean_t exact; uint64_t zoid; #ifdef HAVE_DNLC vnode_t *vp = NULL; #endif /* HAVE_DNLC */ int error = 0; int cmpflags; *zpp = NULL; *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ if ((name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) || (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)) return (SET_ERROR(EEXIST)); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zsb->z_case and zsb->z_norm fields. These choices * affect what vnodes can be cached in the DNLC, how we * perform zap lookups, and the "width" of our dirlocks. * * A normal dirlock locks a single name. Note that with * normalization a name can be composed multiple ways, but * when normalized, these names all compare equal. A wide * dirlock locks multiple names. We need these when the file * system is supporting mixed-mode access. It is sometimes * necessary to lock all case permutations of file name at * once so that simultaneous case-insensitive/case-sensitive * behaves as rationally as possible. */ /* * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. */ exact = ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); /* * Only look in or update the DNLC if we are looking for the * name on a file system that does not require normalization * or case folding. We can also look there if we happen to be * on a non-normalizing, mixed sensitivity file system IF we * are looking for the exact name. * * Maybe can add TO-UPPERed version of name to dnlc in ci-only * case for performance improvement? */ update = !zsb->z_norm || ((zsb->z_case == ZFS_CASE_MIXED) && !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); /* * ZRENAMING indicates we are in a situation where we should * take narrow locks regardless of the file system's * preferences for normalizing and case folding. This will * prevent us deadlocking trying to grab the same wide lock * twice if the two names happen to be case-insensitive * matches. */ if (flag & ZRENAMING) cmpflags = 0; else cmpflags = zsb->z_norm; /* * Wait until there are no locks on this name. * * Don't grab the the lock if it is already held. However, cannot * have both ZSHARED and ZHAVELOCK together. */ ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); if (!(flag & ZHAVELOCK)) rw_enter(&dzp->z_name_lock, RW_READER); mutex_enter(&dzp->z_lock); for (;;) { if (dzp->z_unlinked) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (SET_ERROR(ENOENT)); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, U8_UNICODE_LATEST, &error) == 0) || error != 0) break; } if (error != 0) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); return (SET_ERROR(ENOENT)); } if (dl == NULL) { /* * Allocate a new dirlock and add it to the list. */ dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); dl->dl_name = name; dl->dl_sharecnt = 0; dl->dl_namelock = 0; dl->dl_namesize = 0; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; break; } if ((flag & ZSHARED) && dl->dl_sharecnt != 0) break; cv_wait(&dl->dl_cv, &dzp->z_lock); } /* * If the z_name_lock was NOT held for this dirlock record it. */ if (flag & ZHAVELOCK) dl->dl_namelock = 1; if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { /* * We're the second shared reference to dl. Make a copy of * dl_name in case the first thread goes away before we do. * Note that we initialize the new name before storing its * pointer into dl_name, because the first thread may load * dl->dl_name at any time. He'll either see the old value, * which is his, or the new shared copy; either is OK. */ dl->dl_namesize = strlen(dl->dl_name) + 1; name = kmem_alloc(dl->dl_namesize, KM_SLEEP); bcopy(dl->dl_name, name, dl->dl_namesize); dl->dl_name = name; } mutex_exit(&dzp->z_lock); /* * We have a dirlock on the name. (Note that it is the dirlock, * not the dzp's z_lock, that protects the name in the zap object.) * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? SET_ERROR(ENOENT) : 0); } else { #ifdef HAVE_DNLC if (update) vp = dnlc_lookup(ZTOI(dzp), name); if (vp == DNLC_NO_VNODE) { iput(vp); error = SET_ERROR(ENOENT); } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); iput(vp); return (SET_ERROR(EEXIST)); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { error = zfs_match_find(zsb, dzp, name, exact, update, direntflags, realpnp, &zoid); } #else error = zfs_match_find(zsb, dzp, name, exact, update, direntflags, realpnp, &zoid); #endif /* HAVE_DNLC */ } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); return (SET_ERROR(EEXIST)); } error = zfs_zget(zsb, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } #ifdef HAVE_DNLC if (!(flag & ZXATTR) && update) dnlc_update(ZTOI(dzp), name, ZTOI(*zpp)); #endif /* HAVE_DNLC */ } *dlpp = dl; return (0); }
int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; int err; sa_handle_t *hdl; *zpp = NULL; again: ZFS_OBJ_HOLD_ENTER(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { /* * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. * The SA handle is still valid but because the VFS * requires that the eviction succeed we must drop * our locks and references to allow the eviction to * complete. The zfs_zget() may then be retried. * * This unlikely case could be optimized by registering * a sops->drop_inode() callback. The callback would * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); goto again; } *zpp = zp; err = 0; } mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); }
void zfs_rmnode(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); objset_t *os = zsb->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; int error; ASSERT(zp->z_links == 0); ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* * If this is an attribute directory, purge its contents. */ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } } /* * Free up all the data in the file. */ error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); if (error) { /* * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); return; } /* * If the file has extended attributes, we're going to unlink * the xattr dir. */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT(error == 0); } acl_obj = zfs_external_acl(zp); /* * Set up the final transaction. */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); if (xzp) { dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { /* * Not enough space to delete the file. Leave it in the * unlinked set, leaking it until the fs is remounted (at * which point we'll call zfs_unlinked_drain() to process it). */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); goto out; } if (xzp) { ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) zfs_iput_async(ZTOI(xzp)); }
/* * This is the upward reentry point for packets arriving from the bridging * module and from mac_rx for links not part of a bridge. */ void mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) { mac_impl_t *mip = (mac_impl_t *)mh; mac_ring_t *mr = (mac_ring_t *)mrh; mac_soft_ring_set_t *mac_srs; mblk_t *bp = mp_chain; boolean_t hw_classified = B_FALSE; /* * If there are any promiscuous mode callbacks defined for * this MAC, pass them a copy if appropriate. */ if (mip->mi_promisc_list != NULL) mac_promisc_dispatch(mip, mp_chain, NULL); if (mr != NULL) { /* * If the SRS teardown has started, just return. The 'mr' * continues to be valid until the driver unregisters the mac. * Hardware classified packets will not make their way up * beyond this point once the teardown has started. The driver * is never passed a pointer to a flow entry or SRS or any * structure that can be freed much before mac_unregister. */ mutex_enter(&mr->mr_lock); if ((mr->mr_state != MR_INUSE) || (mr->mr_flag & (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) { mutex_exit(&mr->mr_lock); freemsgchain(mp_chain); return; } if (mr->mr_classify_type == MAC_HW_CLASSIFIER) { hw_classified = B_TRUE; MR_REFHOLD_LOCKED(mr); } mutex_exit(&mr->mr_lock); /* * We check if an SRS is controlling this ring. * If so, we can directly call the srs_lower_proc * routine otherwise we need to go through mac_rx_classify * to reach the right place. */ if (hw_classified) { mac_srs = mr->mr_srs; /* * This is supposed to be the fast path. * All packets received though here were steered by * the hardware classifier, and share the same * MAC header info. */ mac_srs->srs_rx.sr_lower_proc(mh, (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE); MR_REFRELE(mr); return; } /* We'll fall through to software classification */ } else { flow_entry_t *flent; int err; rw_enter(&mip->mi_rw_lock, RW_READER); if (mip->mi_single_active_client != NULL) { flent = mip->mi_single_active_client->mci_flent_list; FLOW_TRY_REFHOLD(flent, err); rw_exit(&mip->mi_rw_lock); if (err == 0) { (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp_chain, B_FALSE); FLOW_REFRELE(flent); return; } } else { rw_exit(&mip->mi_rw_lock); } } if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) { if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL) return; } freemsgchain(bp); }
/* * Link zp into dl. Can only fail if zp has been unlinked. */ int zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) { znode_t *dzp = dl->dl_dzp; zfs_sb_t *zsb = ZTOZSB(zp); uint64_t value; int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; int count = 0; int error; mutex_enter(&zp->z_lock); if (!(flag & ZRENAMING)) { if (zp->z_unlinked) { /* no new links to unlinked zp */ ASSERT(!(flag & (ZNEW | ZEXISTS))); mutex_exit(&zp->z_lock); return (SET_ERROR(ENOENT)); } zp->z_links++; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &dzp->z_id, sizeof (dzp->z_id)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); if (!(flag & ZNEW)) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); } error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); mutex_exit(&zp->z_lock); mutex_enter(&dzp->z_lock); dzp->z_size++; dzp->z_links += zp_is_dir; count = 0; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &dzp->z_links, sizeof (dzp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); mutex_exit(&dzp->z_lock); value = zfs_dirent(zp, zp->z_mode); error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1, &value, tx); ASSERT(error == 0); return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) aflags |= ARC_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_LZJB; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = 0; os->os_logbias = 0; os->os_sync = 0; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; } if (ds == NULL || !dsl_dataset_is_snapshot(ds)) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); DMU_META_DNODE(os) = dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { DMU_USERUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } *osp = os; return (0); }
/* * launch slave cpus into kernel text, pause them, * and restore the original prom pages */ void i_cpr_mp_setup(void) { extern void restart_other_cpu(int); cpu_t *cp; uint64_t kctx = kcontextreg; /* * Do not allow setting page size codes in MMU primary context * register while using cif wrapper. This is needed to work * around OBP incorrect handling of this MMU register. */ kcontextreg = 0; /* * reset cpu_ready_set so x_calls work properly */ CPUSET_ZERO(cpu_ready_set); CPUSET_ADD(cpu_ready_set, getprocessorid()); /* * setup cif to use the cookie from the new/tmp prom * and setup tmp handling for calling prom services. */ i_cpr_cif_setup(CIF_SPLICE); /* * at this point, only the nucleus and a few cpr pages are * mapped in. once we switch to the kernel trap table, * we can access the rest of kernel space. */ prom_set_traptable(&trap_table); if (ncpus > 1) { sfmmu_init_tsbs(); mutex_enter(&cpu_lock); /* * All of the slave cpus are not ready at this time, * yet the cpu structures have various cpu_flags set; * clear cpu_flags and mutex_ready. * Since we are coming up from a CPU suspend, the slave cpus * are frozen. */ for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) { cp->cpu_flags = CPU_FROZEN; cp->cpu_m.mutex_ready = 0; } for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) restart_other_cpu(cp->cpu_id); pause_cpus(NULL, NULL); mutex_exit(&cpu_lock); i_cpr_xcall(i_cpr_clear_entries); } else i_cpr_clear_entries(0, 0); /* * now unlink the cif wrapper; WARNING: do not call any * prom_xxx() routines until after prom pages are restored. */ i_cpr_cif_setup(CIF_UNLINK); (void) i_cpr_prom_pages(CPR_PROM_RESTORE); /* allow setting page size codes in MMU primary context register */ kcontextreg = kctx; }