/* * srpt_ch_post_send */ ibt_status_t srpt_ch_post_send(srpt_channel_t *ch, srpt_iu_t *iu, uint32_t len, uint_t fence) { ibt_status_t status; ibt_send_wr_t wr; ibt_wr_ds_t ds; uint_t posted; ASSERT(ch != NULL); ASSERT(iu != NULL); ASSERT(mutex_owned(&iu->iu_lock)); rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { rw_exit(&ch->ch_rwlock); SRPT_DPRINTF_L2("ch_post_send, bad ch state (%d)", ch->ch_state); return (IBT_FAILURE); } rw_exit(&ch->ch_rwlock); wr.wr_id = srpt_ch_alloc_swqe_wrid(ch, SRPT_SWQE_TYPE_RESP, (void *)iu); if (wr.wr_id == 0) { SRPT_DPRINTF_L2("ch_post_send, queue full"); return (IBT_FAILURE); } atomic_inc_32(&iu->iu_sq_posted_cnt); wr.wr_flags = IBT_WR_SEND_SIGNAL; if (fence == SRPT_FENCE_SEND) { wr.wr_flags |= IBT_WR_SEND_FENCE; } wr.wr_opcode = IBT_WRC_SEND; wr.wr_trans = IBT_RC_SRV; wr.wr_nds = 1; wr.wr_sgl = &ds; ds.ds_va = iu->iu_sge.ds_va; ds.ds_key = iu->iu_sge.ds_key; ds.ds_len = len; SRPT_DPRINTF_L4("ch_post_send, posting SRP response to channel" " ds.ds_va (0x%16llx), ds.ds_key (0x%08x), " " ds.ds_len (%d)", (u_longlong_t)ds.ds_va, ds.ds_key, ds.ds_len); status = ibt_post_send(ch->ch_chan_hdl, &wr, 1, &posted); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_post_send, post_send failed (%d)", status); atomic_dec_32(&iu->iu_sq_posted_cnt); srpt_ch_free_swqe_wrid(ch, wr.wr_id); return (status); } return (IBT_SUCCESS); }
static void profile_create(hrtime_t interval, const char *name, int kind) { profile_probe_t *prof; int nr_frames = PROF_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); if (profile_aframes) nr_frames = profile_aframes; if (interval < profile_interval_min) return; if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) return; atomic_inc_32(&profile_total); if (profile_total > profile_max) { atomic_dec_32(&profile_total); return; } prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); (void) strcpy(prof->prof_name, name); prof->prof_interval = interval; prof->prof_cyclic = CYCLIC_NONE; prof->prof_kind = kind; prof->prof_id = dtrace_probe_create(profile_id, NULL, NULL, name, nr_frames, prof); }
/* * dm_detach: * * Autoconfiguration detach function for pseudo-device glue. * This routine is called by dm_ioctl::dm_dev_remove_ioctl and by autoconf to * remove devices created in device-mapper. */ static int dm_detach(device_t self, int flags) { dm_dev_t *dmv; /* Detach device from global device list */ if ((dmv = dm_dev_detach(self)) == NULL) return ENOENT; /* Destroy active table first. */ dm_table_destroy(&dmv->table_head, DM_TABLE_ACTIVE); /* Destroy inactive table if exits, too. */ dm_table_destroy(&dmv->table_head, DM_TABLE_INACTIVE); dm_table_head_destroy(&dmv->table_head); /* Destroy disk device structure */ disk_detach(dmv->diskp); disk_destroy(dmv->diskp); /* Destroy device */ (void)dm_dev_free(dmv); /* Decrement device counter After removing device */ atomic_dec_32(&dm_dev_counter); return 0; }
/* * Deallocate a tree: release all resources associated with a tree and * remove the tree from the user's tree list. * * The tree being destroyed must be in the "destroying" state and the * reference count must be zero. This function assumes it's single threaded * i.e. only one thread will attempt to destroy a specific tree, which * should be the case if the tree is in disconnected and has a reference * count of zero. */ static void smb_tree_dealloc(smb_tree_t *tree) { ASSERT(tree); ASSERT(tree->t_magic == SMB_TREE_MAGIC); ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED); ASSERT(tree->t_refcnt == 0); /* * Remove the tree from the user's tree list. This must be done * before any resources associated with the tree are released. */ smb_llist_enter(&tree->t_user->u_tree_list, RW_WRITER); smb_llist_remove(&tree->t_user->u_tree_list, tree); smb_llist_exit(&tree->t_user->u_tree_list); tree->t_magic = (uint32_t)~SMB_TREE_MAGIC; smb_idpool_free(&tree->t_user->u_tid_pool, tree->t_tid); atomic_dec_32(&tree->t_session->s_tree_cnt); if (tree->t_snode) smb_node_release(tree->t_snode); mutex_destroy(&tree->t_mutex); /* * The list of open files and open directories should be empty. */ smb_llist_destructor(&tree->t_ofile_list); smb_llist_destructor(&tree->t_odir_list); smb_idpool_destructor(&tree->t_fid_pool); smb_idpool_destructor(&tree->t_odid_pool); kmem_cache_free(tree->t_server->si_cache_tree, tree); }
/* * Delete an odir. * * Remove the odir from the tree list before freeing resources * associated with the odir. */ void smb_odir_delete(void *arg) { smb_tree_t *tree; smb_odir_t *od = (smb_odir_t *)arg; SMB_ODIR_VALID(od); ASSERT(od->d_refcnt == 0); ASSERT(od->d_state == SMB_ODIR_STATE_CLOSED); tree = od->d_tree; smb_llist_enter(&tree->t_odir_list, RW_WRITER); smb_llist_remove(&tree->t_odir_list, od); smb_idpool_free(&tree->t_odid_pool, od->d_odid); atomic_dec_32(&tree->t_session->s_dir_cnt); smb_llist_exit(&tree->t_odir_list); mutex_enter(&od->d_mutex); mutex_exit(&od->d_mutex); od->d_magic = 0; smb_node_release(od->d_dnode); smb_user_release(od->d_user); mutex_destroy(&od->d_mutex); kmem_cache_free(smb_cache_odir, od); }
void oce_delete_nw_interface(struct oce_dev *dev) { /* currently only single interface is implmeneted */ if (dev->nifs > 0) { (void) oce_if_del(dev, dev->if_id); atomic_dec_32(&dev->nifs); } }
int ddi_intr_free(ddi_intr_handle_t h) { ddi_intr_handle_impl_t *hdlp = (ddi_intr_handle_impl_t *)h; int ret; DDI_INTR_APIDBG((CE_CONT, "ddi_intr_free: hdlp = %p\n", (void *)hdlp)); if (hdlp == NULL) return (DDI_EINVAL); rw_enter(&hdlp->ih_rwlock, RW_WRITER); if (((hdlp->ih_flags & DDI_INTR_MSIX_DUP) && (hdlp->ih_state != DDI_IHDL_STATE_ADDED)) || ((hdlp->ih_state != DDI_IHDL_STATE_ALLOC) && (!(hdlp->ih_flags & DDI_INTR_MSIX_DUP)))) { rw_exit(&hdlp->ih_rwlock); return (DDI_EINVAL); } /* Set the number of interrupts to free */ hdlp->ih_scratch1 = 1; ret = i_ddi_intr_ops(hdlp->ih_dip, hdlp->ih_dip, DDI_INTROP_FREE, hdlp, NULL); rw_exit(&hdlp->ih_rwlock); if (ret == DDI_SUCCESS) { /* This would be the dup vector */ if (hdlp->ih_flags & DDI_INTR_MSIX_DUP) atomic_dec_32(&hdlp->ih_main->ih_dup_cnt); else { int n, curr_type; n = i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1; curr_type = i_ddi_intr_get_current_type(hdlp->ih_dip); i_ddi_intr_set_current_nintrs(hdlp->ih_dip, n); if ((i_ddi_irm_supported(hdlp->ih_dip, curr_type) != DDI_SUCCESS) && (n > 0)) (void) i_ddi_irm_modify(hdlp->ih_dip, n); if (hdlp->ih_type & DDI_INTR_TYPE_FIXED) i_ddi_set_intr_handle(hdlp->ih_dip, hdlp->ih_inum, NULL); i_ddi_intr_devi_fini(hdlp->ih_dip); i_ddi_free_intr_phdl(hdlp); } rw_destroy(&hdlp->ih_rwlock); kmem_free(hdlp, sizeof (ddi_intr_handle_impl_t)); } return (ret); }
/*ARGSUSED*/ static void profile_destroy(void *arg, dtrace_id_t id, void *parg) { profile_probe_t *prof = parg; ASSERT(prof->prof_cyclic == CYCLIC_NONE); kmem_free(prof, sizeof (profile_probe_t)); ASSERT(profile_total >= 1); atomic_dec_32(&profile_total); }
/* * srpt_ch_rsp_comp() * * Process a completion for an IB SEND message. A SEND completion * is for a SRP response packet sent back to the initiator. It * will not have a STMF SCSI task associated with it if it was * sent for a rejected IU, or was a task management abort response. */ static void srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu, ibt_wc_status_t wc_status) { stmf_status_t st = STMF_SUCCESS; ASSERT(iu->iu_ch == ch); /* * Process the completion regardless whether it's a failure or * success. At this point, we've processed as far as we can and * just need to complete the associated task. */ if (wc_status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)", wc_status); st = STMF_FAILURE; if (wc_status != IBT_WC_WR_FLUSHED_ERR) { srpt_ch_disconnect(ch); } } /* * If the IU response completion is not associated with * with a SCSI task, release the IU to return the resource * and the reference to the channel it holds. */ mutex_enter(&iu->iu_lock); atomic_dec_32(&iu->iu_sq_posted_cnt); if (iu->iu_stmf_task == NULL) { srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); mutex_exit(&iu->iu_lock); srpt_ch_release_ref(ch, 0); return; } /* * We should not get a SEND completion where the task has already * completed aborting and STMF has been informed. */ ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0); /* * Let STMF know we are done. */ mutex_exit(&iu->iu_lock); stmf_send_status_done(iu->iu_stmf_task, st, STMF_IOF_LPORT_DONE); }
void rw_exit(krwlock_t *rwlp) { if (rwlp->rw_owner == current_thread()) { rwlp->rw_owner = NULL; ASSERT(rwlp->rw_readers == 0); lck_rw_unlock_exclusive((lck_rw_t *)&rwlp->rw_lock[0]); } else { atomic_dec_32((volatile uint32_t *)&rwlp->rw_readers); ASSERT(rwlp->rw_owner == 0); lck_rw_unlock_shared((lck_rw_t *)&rwlp->rw_lock[0]); } }
/* * Free rxbuf. */ static void vmxnet3s_free_rxbuf(vmxnet3s_softc_t *dp, vmxnet3s_rxbuf_t *rxbuf) { vmxnet3s_free(&rxbuf->dma); kmem_free(rxbuf, sizeof (vmxnet3s_rxbuf_t)); #ifndef DEBUG atomic_dec_32(&dp->rxnumbufs); #else { uint32_t nv = atomic_dec_32_nv(&dp->rxnumbufs); ASSERT(nv != (uint32_t)-1); } #endif }
/* * Delete an ofile. * * Remove the ofile from the tree list before freeing resources * associated with the ofile. */ void smb_ofile_delete(void *arg) { smb_tree_t *tree; smb_ofile_t *of = (smb_ofile_t *)arg; SMB_OFILE_VALID(of); ASSERT(of->f_refcnt == 0); ASSERT(of->f_state == SMB_OFILE_STATE_CLOSED); ASSERT(!SMB_OFILE_OPLOCK_GRANTED(of)); tree = of->f_tree; smb_llist_enter(&tree->t_ofile_list, RW_WRITER); smb_llist_remove(&tree->t_ofile_list, of); smb_idpool_free(&tree->t_fid_pool, of->f_fid); atomic_dec_32(&tree->t_session->s_file_cnt); smb_llist_exit(&tree->t_ofile_list); mutex_enter(&of->f_mutex); mutex_exit(&of->f_mutex); switch (of->f_ftype) { case SMB_FTYPE_BYTE_PIPE: case SMB_FTYPE_MESG_PIPE: smb_opipe_dealloc(of->f_pipe); of->f_pipe = NULL; break; case SMB_FTYPE_DISK: if (of->f_odir != NULL) smb_odir_release(of->f_odir); smb_node_rem_ofile(of->f_node, of); smb_node_release(of->f_node); break; default: ASSERT(!"f_ftype"); break; } of->f_magic = (uint32_t)~SMB_OFILE_MAGIC; mutex_destroy(&of->f_mutex); crfree(of->f_cr); smb_user_release(of->f_user); kmem_cache_free(smb_cache_ofile, of); }
/* * Disconnect a tree. */ void smb_tree_disconnect(smb_tree_t *tree, boolean_t do_exec) { smb_execsub_info_t subs; ASSERT(tree->t_magic == SMB_TREE_MAGIC); mutex_enter(&tree->t_mutex); ASSERT(tree->t_refcnt); if (smb_tree_is_connected_locked(tree)) { /* * Indicate that the disconnect process has started. */ tree->t_state = SMB_TREE_STATE_DISCONNECTING; mutex_exit(&tree->t_mutex); atomic_dec_32(&tree->t_server->sv_open_trees); if (do_exec) { /* * The files opened under this tree are closed. */ smb_ofile_close_all(tree); /* * The directories opened under this tree are closed. */ smb_tree_close_odirs(tree, 0); } mutex_enter(&tree->t_mutex); tree->t_state = SMB_TREE_STATE_DISCONNECTED; } mutex_exit(&tree->t_mutex); if (do_exec && tree->t_state == SMB_TREE_STATE_DISCONNECTED && tree->t_shr_flags & SMB_SHRF_UNMAP) { (void) smb_tree_set_execsub_info(tree, &subs); (void) smb_kshare_exec(tree->t_server->sv_lmshrd, (char *)tree->t_sharename, &subs, SMB_SHR_UNMAP); } }
void smbfs_freevfs(vfs_t *vfsp) { smbmntinfo_t *smi; /* free up the resources */ smi = VFTOSMI(vfsp); /* * By this time we should have already deleted the * smi kstats in the unmount code. If they are still around * something is wrong */ ASSERT(smi->smi_io_kstats == NULL); smbfs_zonelist_remove(smi); smbfs_free_smi(smi); /* * Allow _fini() to succeed now, if so desired. */ atomic_dec_32(&smbfs_mountcount); }
/* * e1000g_rxfree_func - the call-back function to reclaim rx buffer * * This function is called when an mp is freed by the user thru * freeb call (Only for mp constructed through desballoc call) * It returns back the freed buffer to the freelist */ void e1000g_rxfree_func(p_rx_sw_packet_t packet) { e1000g_rx_data_t *rx_data; private_devi_list_t *devi_node; struct e1000g *Adapter; uint32_t ring_cnt; uint32_t ref_cnt; unsigned char *address; if (packet->ref_cnt == 0) { /* * This case only happens when rx buffers are being freed * in e1000g_stop() and freemsg() is called. */ return; } rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data; if (packet->mp == NULL) { /* * Allocate a mblk that binds to the data buffer */ address = (unsigned char *)packet->rx_buf->address; if (address != NULL) { packet->mp = desballoc((unsigned char *) address, packet->rx_buf->size, BPRI_MED, &packet->free_rtn); } } /* * Enqueue the recycled packets in a recycle queue. When freelist * dries up, move the entire chain of packets from recycle queue * to freelist. This helps in avoiding per packet mutex contention * around freelist. */ mutex_enter(&rx_data->recycle_lock); QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link); rx_data->recycle_freepkt++; mutex_exit(&rx_data->recycle_lock); ref_cnt = atomic_dec_32_nv(&packet->ref_cnt); if (ref_cnt == 0) { mutex_enter(&e1000g_rx_detach_lock); e1000g_free_rx_sw_packet(packet, B_FALSE); atomic_dec_32(&rx_data->pending_count); atomic_dec_32(&e1000g_mblks_pending); if ((rx_data->pending_count == 0) && (rx_data->flag & E1000G_RX_STOPPED)) { devi_node = rx_data->priv_devi_node; if (devi_node != NULL) { ring_cnt = atomic_dec_32_nv( &devi_node->pending_rx_count); if ((ring_cnt == 0) && (devi_node->flag & E1000G_PRIV_DEVI_DETACH)) { e1000g_free_priv_devi_node( devi_node); } } else { Adapter = rx_data->rx_ring->adapter; atomic_dec_32( &Adapter->pending_rx_count); } e1000g_free_rx_pending_buffers(rx_data); e1000g_free_rx_data(rx_data); } mutex_exit(&e1000g_rx_detach_lock); } }
/* * smb_ofile_close */ void smb_ofile_close(smb_ofile_t *of, int32_t mtime_sec) { timestruc_t now; uint32_t flags = 0; SMB_OFILE_VALID(of); mutex_enter(&of->f_mutex); ASSERT(of->f_refcnt); switch (of->f_state) { case SMB_OFILE_STATE_OPEN: { of->f_state = SMB_OFILE_STATE_CLOSING; mutex_exit(&of->f_mutex); if (of->f_ftype == SMB_FTYPE_MESG_PIPE) { smb_opipe_close(of); smb_server_dec_pipes(of->f_server); } else { smb_attr_t *pa = &of->f_pending_attr; /* * In here we make changes to of->f_pending_attr * while not holding of->f_mutex. This is OK * because we've changed f_state to CLOSING, * so no more threads will take this path. */ if (mtime_sec != 0) { pa->sa_vattr.va_mtime.tv_sec = mtime_sec; pa->sa_mask |= SMB_AT_MTIME; } /* * If we have ever modified data via this handle * (write or truncate) and if the mtime was not * set via this handle, update the mtime again * during the close. Windows expects this. * [ MS-FSA 2.1.5.4 "Update Timestamps" ] */ if (of->f_written && (pa->sa_mask & SMB_AT_MTIME) == 0) { pa->sa_mask |= SMB_AT_MTIME; gethrestime(&now); pa->sa_vattr.va_mtime = now; } if (of->f_flags & SMB_OFLAGS_SET_DELETE_ON_CLOSE) { if (smb_tree_has_feature(of->f_tree, SMB_TREE_CATIA)) { flags |= SMB_CATIA; } (void) smb_node_set_delete_on_close(of->f_node, of->f_cr, flags); } smb_fsop_unshrlock(of->f_cr, of->f_node, of->f_uniqid); smb_node_destroy_lock_by_ofile(of->f_node, of); if (smb_node_is_file(of->f_node)) { (void) smb_fsop_close(of->f_node, of->f_mode, of->f_cr); smb_oplock_release(of->f_node, of); } if (smb_node_dec_open_ofiles(of->f_node) == 0) { /* * Last close. The f_pending_attr has * only times (atime,ctime,mtime) so * we can borrow it to commit the * n_pending_dosattr from the node. */ pa->sa_dosattr = of->f_node->n_pending_dosattr; if (pa->sa_dosattr != 0) pa->sa_mask |= SMB_AT_DOSATTR; /* Let's leave this zero when not in use. */ of->f_node->n_allocsz = 0; } if (pa->sa_mask != 0) { /* * Commit any pending attributes from * the ofile we're closing. Note that * we pass NULL as the ofile to setattr * so it will write to the file system * and not keep anything on the ofile. * This clears n_pending_dosattr if * there are no opens, otherwise the * dosattr will be pending again. */ (void) smb_node_setattr(NULL, of->f_node, of->f_cr, NULL, pa); } /* * Cancel any notify change requests that * may be using this open instance. */ if (of->f_node->n_fcn.fcn_count) smb_notify_file_closed(of); smb_server_dec_files(of->f_server); } atomic_dec_32(&of->f_tree->t_open_files); mutex_enter(&of->f_mutex); ASSERT(of->f_refcnt); ASSERT(of->f_state == SMB_OFILE_STATE_CLOSING); of->f_state = SMB_OFILE_STATE_CLOSED; break; } case SMB_OFILE_STATE_CLOSED: case SMB_OFILE_STATE_CLOSING: break; default: ASSERT(0); break; } mutex_exit(&of->f_mutex); }
/* * Decrement a CPU's work count */ static void xc_decrement(struct machcpu *mcpu) { atomic_dec_32(&mcpu->xc_work_cnt); }
/* * srpt_ch_rsp_comp() * * Process a completion for an IB SEND message. A SEND completion * is for a SRP response packet sent back to the initiator. It * will not have a STMF SCSI task associated with it if it was * sent for a rejected IU, or was a task management abort response. */ static void srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu, ibt_wc_status_t wc_status) { ASSERT(iu->iu_ch == ch); /* * If work completion indicates failure, decrement the * send posted count. If it is a flush error, we are * done; for all other errors start a channel disconnect. */ if (wc_status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)", wc_status); atomic_dec_32(&iu->iu_sq_posted_cnt); if (wc_status != IBT_WC_WR_FLUSHED_ERR) { srpt_ch_disconnect(ch); } mutex_enter(&iu->iu_lock); if (iu->iu_stmf_task == NULL) { srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); mutex_exit(&iu->iu_lock); srpt_ch_release_ref(ch, 0); } else { /* cleanup handled in task_free */ mutex_exit(&iu->iu_lock); } return; } /* * If the IU response completion is not associated with * with a SCSI task, release the IU to return the resource * and the reference to the channel it holds. */ mutex_enter(&iu->iu_lock); atomic_dec_32(&iu->iu_sq_posted_cnt); if (iu->iu_stmf_task == NULL) { srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); mutex_exit(&iu->iu_lock); srpt_ch_release_ref(ch, 0); return; } /* * If STMF has requested the IU task be aborted, then notify STMF * the command is now aborted. */ if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) { scsi_task_t *abort_task = iu->iu_stmf_task; mutex_exit(&iu->iu_lock); stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task, STMF_ABORTED, NULL); return; } /* * We should not get a SEND completion where the task has already * completed aborting and STMF has been informed. */ ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0); /* * Successful status response completion for SCSI task. * Let STMF know we are done. */ mutex_exit(&iu->iu_lock); stmf_send_status_done(iu->iu_stmf_task, STMF_SUCCESS, STMF_IOF_LPORT_DONE); }
/* * Remove a lnode from the table */ void freelonode(lnode_t *lp) { lnode_t *lt; lnode_t *ltprev = NULL; struct lfsnode *lfs, *nextlfs; struct vfs *vfsp; struct vnode *vp = ltov(lp); struct vnode *realvp = realvp(vp); struct loinfo *li = vtoli(vp->v_vfsp); #ifdef LODEBUG lo_dprint(4, "freelonode lp %p hash %d\n", lp, ltablehash(lp->lo_vp, li)); #endif TABLE_LOCK_ENTER(lp->lo_vp, li); mutex_enter(&vp->v_lock); if (vp->v_count > 1) { vp->v_count--; /* release our hold from vn_rele */ mutex_exit(&vp->v_lock); TABLE_LOCK_EXIT(lp->lo_vp, li); return; } mutex_exit(&vp->v_lock); for (lt = TABLE_BUCKET(lp->lo_vp, li); lt != NULL; ltprev = lt, lt = lt->lo_next) { if (lt == lp) { #ifdef LODEBUG lo_dprint(4, "freeing %p, vfsp %p\n", vp, vp->v_vfsp); #endif atomic_dec_32(&li->li_refct); vfsp = vp->v_vfsp; vn_invalid(vp); if (vfsp != li->li_mountvfs) { mutex_enter(&li->li_lfslock); /* * Check for unused lfs */ lfs = li->li_lfs; while (lfs != NULL) { nextlfs = lfs->lfs_next; if (vfsp == &lfs->lfs_vfs) { lfs_rele(lfs, li); break; } if (lfs->lfs_vfs.vfs_count == 1) { /* * Lfs is idle */ freelfsnode(lfs, li); } lfs = nextlfs; } mutex_exit(&li->li_lfslock); } if (ltprev == NULL) { TABLE_BUCKET(lt->lo_vp, li) = lt->lo_next; } else { ltprev->lo_next = lt->lo_next; } TABLE_COUNT(lt->lo_vp, li)--; TABLE_LOCK_EXIT(lt->lo_vp, li); kmem_cache_free(lnode_cache, lt); vn_free(vp); VN_RELE(realvp); return; } } panic("freelonode"); /*NOTREACHED*/ }
template<typename T> static void decrease(T *ptr) { atomic_dec_32(ptr); }
/* * Release a process-private rwlock and wake up any thread(s) sleeping on it. * This is called when a thread releases a lock that appears to have waiters. */ static void rw_queue_release(rwlock_t *rwlp) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; queue_head_t *qp; uint32_t readers; uint32_t writer; ulwp_t **ulwpp; ulwp_t *ulwp; ulwp_t *prev; int nlwpid = 0; int more; int maxlwps = MAXLWPS; lwpid_t buffer[MAXLWPS]; lwpid_t *lwpid = buffer; qp = queue_lock(rwlp, MX); /* * Here is where we actually drop the lock, * but we retain the URW_HAS_WAITERS flag, if it is already set. */ readers = *rwstate; ASSERT_CONSISTENT_STATE(readers); if (readers & URW_WRITE_LOCKED) /* drop the writer lock */ atomic_and_32(rwstate, ~URW_WRITE_LOCKED); else /* drop the readers lock */ atomic_dec_32(rwstate); if (!(readers & URW_HAS_WAITERS)) { /* no waiters */ queue_unlock(qp); return; } /* * The presence of the URW_HAS_WAITERS flag causes all rwlock * code to go through the slow path, acquiring queue_lock(qp). * Therefore, the rest of this code is safe because we are * holding the queue lock and the URW_HAS_WAITERS flag is set. */ readers = *rwstate; /* must fetch the value again */ ASSERT_CONSISTENT_STATE(readers); ASSERT(readers & URW_HAS_WAITERS); readers &= URW_READERS_MASK; /* count of current readers */ writer = 0; /* no current writer */ /* * Examine the queue of waiters in priority order and prepare * to wake up as many readers as we encounter before encountering * a writer. If the highest priority thread on the queue is a * writer, stop there and wake it up. * * We keep track of lwpids that are to be unparked in lwpid[]. * __lwp_unpark_all() is called to unpark all of them after * they have been removed from the sleep queue and the sleep * queue lock has been dropped. If we run out of space in our * on-stack buffer, we need to allocate more but we can't call * lmalloc() because we are holding a queue lock when the overflow * occurs and lmalloc() acquires a lock. We can't use alloca() * either because the application may have allocated a small * stack and we don't want to overrun the stack. So we call * alloc_lwpids() to allocate a bigger buffer using the mmap() * system call directly since that path acquires no locks. */ while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) { ulwp = *ulwpp; ASSERT(ulwp->ul_wchan == rwlp); if (ulwp->ul_writer) { if (writer != 0 || readers != 0) break; /* one writer to wake */ writer++; } else { if (writer != 0) break; /* at least one reader to wake */ readers++; if (nlwpid == maxlwps) lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); } queue_unlink(qp, ulwpp, prev); ulwp->ul_sleepq = NULL; ulwp->ul_wchan = NULL; if (writer) { /* * Hand off the lock to the writer we will be waking. */ ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0); atomic_or_32(rwstate, URW_WRITE_LOCKED); rwlp->rwlock_owner = (uintptr_t)ulwp; } lwpid[nlwpid++] = ulwp->ul_lwpid; } /* * This modification of rwstate must be done last. * The presence of the URW_HAS_WAITERS flag causes all rwlock * code to go through the slow path, acquiring queue_lock(qp). * Otherwise the read_lock_try() and write_lock_try() fast paths * are effective. */ if (ulwpp == NULL) atomic_and_32(rwstate, ~URW_HAS_WAITERS); if (nlwpid == 0) { queue_unlock(qp); } else { ulwp_t *self = curthread; no_preempt(self); queue_unlock(qp); if (nlwpid == 1) (void) __lwp_unpark(lwpid[0]); else (void) __lwp_unpark_all(lwpid, nlwpid); preempt(self); } if (lwpid != buffer) (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); }
/* * srpt_ch_data_comp() * * Process an IB completion for a RDMA operation. This completion * should be associated with the last RDMA operation for any * data buffer transfer. */ static void srpt_ch_data_comp(srpt_channel_t *ch, stmf_data_buf_t *stmf_dbuf, ibt_wc_status_t wc_status) { srpt_ds_dbuf_t *dbuf; srpt_iu_t *iu; stmf_status_t status; ASSERT(stmf_dbuf != NULL); dbuf = (srpt_ds_dbuf_t *)stmf_dbuf->db_port_private; ASSERT(dbuf != NULL); iu = dbuf->db_iu; ASSERT(iu != NULL); ASSERT(iu->iu_ch == ch); /* * If work completion indicates non-flush failure, then * start a channel disconnect (asynchronous) and release * the reference to the IU. The task will be cleaned * up with STMF during channel shutdown processing. */ if (wc_status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_data_comp, WC status err(%d)", wc_status); if (wc_status != IBT_WC_WR_FLUSHED_ERR) { srpt_ch_disconnect(ch); } atomic_dec_32(&iu->iu_sq_posted_cnt); return; } /* * If STMF has requested this task be aborted, then if this is the * last I/O operation outstanding, notify STMF the task has been * aborted and ignore the completion. */ mutex_enter(&iu->iu_lock); atomic_dec_32(&iu->iu_sq_posted_cnt); if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) { scsi_task_t *abort_task = iu->iu_stmf_task; mutex_exit(&iu->iu_lock); stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task, STMF_ABORTED, NULL); return; } /* * We should not get an RDMA completion where the task has already * completed aborting and STMF has been informed. */ ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0); /* * Good completion for last RDMA op associated with a data buffer * I/O, if specified initiate status otherwise let STMF know we are * done. */ stmf_dbuf->db_xfer_status = STMF_SUCCESS; mutex_exit(&iu->iu_lock); DTRACE_SRP_8(xfer__done, srpt_channel_t, ch, ibt_wr_ds_t, &(dbuf->db_sge), srpt_iu_t, iu, ibt_send_wr_t, 0, uint32_t, stmf_dbuf->db_data_size, uint32_t, 0, uint32_t, 0, uint32_t, (stmf_dbuf->db_flags & DB_DIRECTION_TO_RPORT) ? 1 : 0); if ((stmf_dbuf->db_flags & DB_SEND_STATUS_GOOD) != 0) { status = srpt_stp_send_status(dbuf->db_iu->iu_stmf_task, 0); if (status == STMF_SUCCESS) { return; } stmf_dbuf->db_xfer_status = STMF_FAILURE; } stmf_data_xfer_done(dbuf->db_iu->iu_stmf_task, stmf_dbuf, 0); }
/* * Return value: * 1 - exitlwps() failed, call (or continue) lwp_exit() * 0 - restarting init. Return through system call path */ int proc_exit(int why, int what) { kthread_t *t = curthread; klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); zone_t *z = p->p_zone; timeout_id_t tmp_id; int rv; proc_t *q; task_t *tk; vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; sigqueue_t *sqp; lwpdir_t *lwpdir; uint_t lwpdir_sz; tidhash_t *tidhash; uint_t tidhash_sz; ret_tidhash_t *ret_tidhash; refstr_t *cwd; hrtime_t hrutime, hrstime; int evaporate; brand_t *orig_brand = NULL; void *brand_data = NULL; /* * Stop and discard the process's lwps except for the current one, * unless some other lwp beat us to it. If exitlwps() fails then * return and the calling lwp will call (or continue in) lwp_exit(). */ proc_is_exiting(p); if (exitlwps(0) != 0) return (1); mutex_enter(&p->p_lock); if (p->p_ttime > 0) { /* * Account any remaining ticks charged to this process * on its way out. */ (void) task_cpu_time_incr(p->p_task, p->p_ttime); p->p_ttime = 0; } mutex_exit(&p->p_lock); DTRACE_PROC(lwp__exit); DTRACE_PROC1(exit, int, why); /* * Will perform any brand specific proc exit processing. Since this * is always the last lwp, will also perform lwp_exit and free * brand_data, except in the case that the brand has a b_exit_with_sig * handler. In this case we free the brand_data later within this * function. */ mutex_enter(&p->p_lock); if (PROC_IS_BRANDED(p)) { orig_brand = p->p_brand; if (p->p_brand_data != NULL && orig_brand->b_data_size > 0) { brand_data = p->p_brand_data; } lwp_detach_brand_hdlrs(lwp); brand_clearbrand(p, B_FALSE); } mutex_exit(&p->p_lock); /* * Don't let init exit unless zone_start_init() failed its exec, or * we are shutting down the zone or the machine. * * Since we are single threaded, we don't need to lock the * following accesses to zone_proc_initpid. */ if (p->p_pid == z->zone_proc_initpid) { if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { if (z->zone_restart_init == B_TRUE) { if (restart_init(what, why) == 0) return (0); } z->zone_init_status = wstat(why, what); (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, CRED()); } /* * Since we didn't or couldn't restart init, we clear * the zone's init state and proceed with exit * processing. */ z->zone_proc_initpid = -1; } lwp_pcb_exit(); /* * Allocate a sigqueue now, before we grab locks. * It will be given to sigcld(), below. * Special case: If we will be making the process disappear * without a trace because it is either: * * an exiting SSYS process, or * * a posix_spawn() vfork child who requests it, * we don't bother to allocate a useless sigqueue. */ evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) && why == CLD_EXITED && what == _EVAPORATE); if (!evaporate) sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); /* * revoke any doors created by the process. */ if (p->p_door_list) door_exit(); /* * Release schedctl data structures. */ if (p->p_pagep) schedctl_proc_cleanup(); /* * make sure all pending kaio has completed. */ if (p->p_aio) aio_cleanup_exit(); /* * discard the lwpchan cache. */ if (p->p_lcp != NULL) lwpchan_destroy_cache(0); /* * Clean up any DTrace helper actions or probes for the process. */ if (p->p_dtrace_helpers != NULL) { ASSERT(dtrace_helpers_cleanup != NULL); (*dtrace_helpers_cleanup)(); } /* untimeout the realtime timers */ if (p->p_itimer != NULL) timer_exit(); if ((tmp_id = p->p_alarmid) != 0) { p->p_alarmid = 0; (void) untimeout(tmp_id); } /* * Remove any fpollinfo_t's for this (last) thread from our file * descriptors so closeall() can ASSERT() that they're all gone. */ pollcleanup(); if (p->p_rprof_cyclic != CYCLIC_NONE) { mutex_enter(&cpu_lock); cyclic_remove(p->p_rprof_cyclic); mutex_exit(&cpu_lock); } mutex_enter(&p->p_lock); /* * Clean up any DTrace probes associated with this process. */ if (p->p_dtrace_probes) { ASSERT(dtrace_fasttrap_exit_ptr != NULL); dtrace_fasttrap_exit_ptr(p); } while ((tmp_id = p->p_itimerid) != 0) { p->p_itimerid = 0; mutex_exit(&p->p_lock); (void) untimeout(tmp_id); mutex_enter(&p->p_lock); } lwp_cleanup(); /* * We are about to exit; prevent our resource associations from * being changed. */ pool_barrier_enter(); /* * Block the process against /proc now that we have really * acquired p->p_lock (to manipulate p_tlist at least). */ prbarrier(p); sigfillset(&p->p_ignore); sigemptyset(&p->p_siginfo); sigemptyset(&p->p_sig); sigemptyset(&p->p_extsig); sigemptyset(&t->t_sig); sigemptyset(&t->t_extsig); sigemptyset(&p->p_sigmask); sigdelq(p, t, 0); lwp->lwp_cursig = 0; lwp->lwp_extsig = 0; p->p_flag &= ~(SKILLED | SEXTKILLED); if (lwp->lwp_curinfo) { siginfofree(lwp->lwp_curinfo); lwp->lwp_curinfo = NULL; } t->t_proc_flag |= TP_LWPEXIT; ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); prlwpexit(t); /* notify /proc */ lwp_hash_out(p, t->t_tid); prexit(p); p->p_lwpcnt = 0; p->p_tlist = NULL; sigqfree(p); term_mstate(t); p->p_mterm = gethrtime(); exec_vp = p->p_exec; execdir_vp = p->p_execdir; p->p_exec = NULLVP; p->p_execdir = NULLVP; mutex_exit(&p->p_lock); pr_free_watched_pages(p); closeall(P_FINFO(p)); /* Free the controlling tty. (freectty() always assumes curproc.) */ ASSERT(p == curproc); (void) freectty(B_TRUE); #if defined(__sparc) if (p->p_utraps != NULL) utrap_free(p); #endif if (p->p_semacct) /* IPC semaphore exit */ semexit(p); rv = wstat(why, what); acct(rv & 0xff); exacct_commit_proc(p, rv); /* * Release any resources associated with C2 auditing */ if (AU_AUDITING()) { /* * audit exit system call */ audit_exit(why, what); } /* * Free address space. */ relvm(); if (exec_vp) { /* * Close this executable which has been opened when the process * was created by getproc(). */ (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL); VN_RELE(exec_vp); } if (execdir_vp) VN_RELE(execdir_vp); /* * Release held contracts. */ contract_exit(p); /* * Depart our encapsulating process contract. */ if ((p->p_flag & SSYS) == 0) { ASSERT(p->p_ct_process); contract_process_exit(p->p_ct_process, p, rv); } /* * Remove pool association, and block if requested by pool_do_bind. */ mutex_enter(&p->p_lock); ASSERT(p->p_pool->pool_ref > 0); atomic_dec_32(&p->p_pool->pool_ref); p->p_pool = pool_default; /* * Now that our address space has been freed and all other threads * in this process have exited, set the PEXITED pool flag. This * tells the pools subsystems to ignore this process if it was * requested to rebind this process to a new pool. */ p->p_poolflag |= PEXITED; pool_barrier_exit(); mutex_exit(&p->p_lock); mutex_enter(&pidlock); /* * Delete this process from the newstate list of its parent. We * will put it in the right place in the sigcld in the end. */ delete_ns(p->p_parent, p); /* * Reassign the orphans to the next of kin. * Don't rearrange init's orphanage. */ if ((q = p->p_orphan) != NULL && p != proc_init) { proc_t *nokp = p->p_nextofkin; for (;;) { q->p_nextofkin = nokp; if (q->p_nextorph == NULL) break; q = q->p_nextorph; } q->p_nextorph = nokp->p_orphan; nokp->p_orphan = p->p_orphan; p->p_orphan = NULL; } /* * Reassign the children to init. * Don't try to assign init's children to init. */ if ((q = p->p_child) != NULL && p != proc_init) { struct proc *np; struct proc *initp = proc_init; pid_t zone_initpid = 1; struct proc *zoneinitp = NULL; boolean_t setzonetop = B_FALSE; if (!INGLOBALZONE(curproc)) { zone_initpid = curproc->p_zone->zone_proc_initpid; ASSERT(MUTEX_HELD(&pidlock)); zoneinitp = prfind(zone_initpid); if (zoneinitp != NULL) { initp = zoneinitp; } else { zone_initpid = 1; setzonetop = B_TRUE; } } pgdetach(p); do { np = q->p_sibling; /* * Delete it from its current parent new state * list and add it to init new state list */ delete_ns(q->p_parent, q); q->p_ppid = zone_initpid; q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); if (setzonetop) { mutex_enter(&q->p_lock); q->p_flag |= SZONETOP; mutex_exit(&q->p_lock); } q->p_parent = initp; /* * Since q will be the first child, * it will not have a previous sibling. */ q->p_psibling = NULL; if (initp->p_child) { initp->p_child->p_psibling = q; } q->p_sibling = initp->p_child; initp->p_child = q; if (q->p_proc_flag & P_PR_PTRACE) { mutex_enter(&q->p_lock); sigtoproc(q, NULL, SIGKILL); mutex_exit(&q->p_lock); } /* * sigcld() will add the child to parents * newstate list. */ if (q->p_stat == SZOMB) sigcld(q, NULL); } while ((q = np) != NULL); p->p_child = NULL; ASSERT(p->p_child_ns == NULL); } TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); mutex_enter(&p->p_lock); CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ /* * Have our task accummulate our resource usage data before they * become contaminated by p_cacct etc., and before we renounce * membership of the task. * * We do this regardless of whether or not task accounting is active. * This is to avoid having nonsense data reported for this task if * task accounting is subsequently enabled. The overhead is minimal; * by this point, this process has accounted for the usage of all its * LWPs. We nonetheless do the work here, and under the protection of * pidlock, so that the movement of the process's usage to the task * happens at the same time as the removal of the process from the * task, from the point of view of exacct_snapshot_task_usage(). */ exacct_update_task_mstate(p); hrutime = mstate_aggr_state(p, LMS_USER); hrstime = mstate_aggr_state(p, LMS_SYSTEM); p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; p->p_ru.minflt += p->p_cru.minflt; p->p_ru.majflt += p->p_cru.majflt; p->p_ru.nswap += p->p_cru.nswap; p->p_ru.inblock += p->p_cru.inblock; p->p_ru.oublock += p->p_cru.oublock; p->p_ru.msgsnd += p->p_cru.msgsnd; p->p_ru.msgrcv += p->p_cru.msgrcv; p->p_ru.nsignals += p->p_cru.nsignals; p->p_ru.nvcsw += p->p_cru.nvcsw; p->p_ru.nivcsw += p->p_cru.nivcsw; p->p_ru.sysc += p->p_cru.sysc; p->p_ru.ioch += p->p_cru.ioch; p->p_stat = SZOMB; p->p_proc_flag &= ~P_PR_PTRACE; p->p_wdata = what; p->p_wcode = (char)why; cdir = PTOU(p)->u_cdir; rdir = PTOU(p)->u_rdir; cwd = PTOU(p)->u_cwd; ASSERT(cdir != NULL || p->p_parent == &p0); /* * Release resource controls, as they are no longer enforceable. */ rctl_set_free(p->p_rctls); /* * Decrement tk_nlwps counter for our task.max-lwps resource control. * An extended accounting record, if that facility is active, is * scheduled to be written. We cannot give up task and project * membership at this point because that would allow zombies to escape * from the max-processes resource controls. Zombies stay in their * current task and project until the process table slot is released * in freeproc(). */ tk = p->p_task; mutex_enter(&p->p_zone->zone_nlwps_lock); tk->tk_nlwps--; tk->tk_proj->kpj_nlwps--; p->p_zone->zone_nlwps--; mutex_exit(&p->p_zone->zone_nlwps_lock); /* * Clear the lwp directory and the lwpid hash table * now that /proc can't bother us any more. * We free the memory below, after dropping p->p_lock. */ lwpdir = p->p_lwpdir; lwpdir_sz = p->p_lwpdir_sz; tidhash = p->p_tidhash; tidhash_sz = p->p_tidhash_sz; ret_tidhash = p->p_ret_tidhash; p->p_lwpdir = NULL; p->p_lwpfree = NULL; p->p_lwpdir_sz = 0; p->p_tidhash = NULL; p->p_tidhash_sz = 0; p->p_ret_tidhash = NULL; /* * If the process has context ops installed, call the exit routine * on behalf of this last remaining thread. Normally exitpctx() is * called during thread_exit() or lwp_exit(), but because this is the * last thread in the process, we must call it here. By the time * thread_exit() is called (below), the association with the relevant * process has been lost. * * We also free the context here. */ if (p->p_pctx) { kpreempt_disable(); exitpctx(p); kpreempt_enable(); freepctx(p, 0); } /* * curthread's proc pointer is changed to point to the 'sched' * process for the corresponding zone, except in the case when * the exiting process is in fact a zsched instance, in which * case the proc pointer is set to p0. We do so, so that the * process still points at the right zone when we call the VN_RELE() * below. * * This is because curthread's original proc pointer can be freed as * soon as the child sends a SIGCLD to its parent. We use zsched so * that for user processes, even in the final moments of death, the * process is still associated with its zone. */ if (p != t->t_procp->p_zone->zone_zsched) t->t_procp = t->t_procp->p_zone->zone_zsched; else t->t_procp = &p0; mutex_exit(&p->p_lock); if (!evaporate) { /* * The brand specific code only happens when the brand has a * function to call in place of sigcld, the data itself still * existed, and the parent of the exiting process is not the * global zone init. If the parent is the global zone init, * then the process was reparented, and we don't want brand * code delivering possibly strange signals to init. Also, init * is not branded, so any brand specific exit data will not be * picked up by init anyway. * It is assumed by this code that any brand where * b_exit_with_sig == NULL, will free its own brand_data rather * than letting this piece of code free it. */ if (orig_brand != NULL && orig_brand->b_ops->b_exit_with_sig != NULL && brand_data != NULL && p->p_ppid != 1) { /* * The code for _fini that could unload the brand_t * blocks until the count of zones using the module * reaches zero. Zones decrement the refcount on their * brands only after all user tasks in that zone have * exited and been waited on. The decrement on the * brand's refcount happen in zone_destroy(). That * depends on zone_shutdown() having been completed. * zone_shutdown() includes a call to zone_empty(), * where the zone waits for itself to reach the state * ZONE_IS_EMPTY. This state is only set in either * zone_shutdown(), when there are no user processes as * the zone enters this function, or in * zone_task_rele(). zone_task_rele() is called from * code triggered by waiting on processes, not by the * processes exiting through proc_exit(). This means * all the branded processes that could exist for a * specific brand_t must exit and get reaped before the * refcount on the brand_t can reach 0. _fini will * never unload the corresponding brand module before * proc_exit finishes execution for all processes * branded with a particular brand_t, which makes the * operation below safe to do. Brands that wish to use * this mechanism must wait in _fini as described * above. */ orig_brand->b_ops->b_exit_with_sig(p, sqp, brand_data); } else { p->p_pidflag &= ~CLDPEND; sigcld(p, sqp); } if (brand_data != NULL) { kmem_free(brand_data, orig_brand->b_data_size); brand_data = NULL; orig_brand = NULL; } } else { /* * Do what sigcld() would do if the disposition * of the SIGCHLD signal were set to be ignored. */ cv_broadcast(&p->p_srwchan_cv); freeproc(p); } mutex_exit(&pidlock); /* * We don't release u_cdir and u_rdir until SZOMB is set. * This protects us against dofusers(). */ if (cdir) VN_RELE(cdir); if (rdir) VN_RELE(rdir); if (cwd) refstr_rele(cwd); /* * task_rele() may ultimately cause the zone to go away (or * may cause the last user process in a zone to go away, which * signals zsched to go away). So prior to this call, we must * no longer point at zsched. */ t->t_procp = &p0; kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t)); while (ret_tidhash != NULL) { ret_tidhash_t *next = ret_tidhash->rth_next; kmem_free(ret_tidhash->rth_tidhash, ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t)); kmem_free(ret_tidhash, sizeof (*ret_tidhash)); ret_tidhash = next; } thread_exit(); /* NOTREACHED */ }
/* ARGSUSED */ int mac_register(mac_register_t *mregp, mac_handle_t *mhp) { mac_impl_t *mip; mactype_t *mtype; int err = EINVAL; struct devnames *dnp = NULL; uint_t instance; boolean_t style1_created = B_FALSE; boolean_t style2_created = B_FALSE; char *driver; minor_t minor = 0; /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */ if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip))) return (EINVAL); /* Find the required MAC-Type plugin. */ if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL) return (EINVAL); /* Create a mac_impl_t to represent this MAC. */ mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP); /* * The mac is not ready for open yet. */ mip->mi_state_flags |= MIS_DISABLED; /* * When a mac is registered, the m_instance field can be set to: * * 0: Get the mac's instance number from m_dip. * This is usually used for physical device dips. * * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number. * For example, when an aggregation is created with the key option, * "key" will be used as the instance number. * * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1]. * This is often used when a MAC of a virtual link is registered * (e.g., aggregation when "key" is not specified, or vnic). * * Note that the instance number is used to derive the mi_minor field * of mac_impl_t, which will then be used to derive the name of kstats * and the devfs nodes. The first 2 cases are needed to preserve * backward compatibility. */ switch (mregp->m_instance) { case 0: instance = ddi_get_instance(mregp->m_dip); break; case ((uint_t)-1): minor = mac_minor_hold(B_TRUE); if (minor == 0) { err = ENOSPC; goto fail; } instance = minor - 1; break; default: instance = mregp->m_instance; if (instance >= MAC_MAX_MINOR) { err = EINVAL; goto fail; } break; } mip->mi_minor = (minor_t)(instance + 1); mip->mi_dip = mregp->m_dip; mip->mi_clients_list = NULL; mip->mi_nclients = 0; /* Set the default IEEE Port VLAN Identifier */ mip->mi_pvid = 1; /* Default bridge link learning protection values */ mip->mi_llimit = 1000; mip->mi_ldecay = 200; driver = (char *)ddi_driver_name(mip->mi_dip); /* Construct the MAC name as <drvname><instance> */ (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d", driver, instance); mip->mi_driver = mregp->m_driver; mip->mi_type = mtype; mip->mi_margin = mregp->m_margin; mip->mi_info.mi_media = mtype->mt_type; mip->mi_info.mi_nativemedia = mtype->mt_nativetype; if (mregp->m_max_sdu <= mregp->m_min_sdu) goto fail; if (mregp->m_multicast_sdu == 0) mregp->m_multicast_sdu = mregp->m_max_sdu; if (mregp->m_multicast_sdu < mregp->m_min_sdu || mregp->m_multicast_sdu > mregp->m_max_sdu) goto fail; mip->mi_sdu_min = mregp->m_min_sdu; mip->mi_sdu_max = mregp->m_max_sdu; mip->mi_sdu_multicast = mregp->m_multicast_sdu; mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length; /* * If the media supports a broadcast address, cache a pointer to it * in the mac_info_t so that upper layers can use it. */ mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr; mip->mi_v12n_level = mregp->m_v12n; /* * Copy the unicast source address into the mac_info_t, but only if * the MAC-Type defines a non-zero address length. We need to * handle MAC-Types that have an address length of 0 * (point-to-point protocol MACs for example). */ if (mip->mi_type->mt_addr_length > 0) { if (mregp->m_src_addr == NULL) goto fail; mip->mi_info.mi_unicst_addr = kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP); bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); /* * Copy the fixed 'factory' MAC address from the immutable * info. This is taken to be the MAC address currently in * use. */ bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr, mip->mi_type->mt_addr_length); /* * At this point, we should set up the classification * rules etc but we delay it till mac_open() so that * the resource discovery has taken place and we * know someone wants to use the device. Otherwise * memory gets allocated for Rx ring structures even * during probe. */ /* Copy the destination address if one is provided. */ if (mregp->m_dst_addr != NULL) { bcopy(mregp->m_dst_addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); mip->mi_dstaddr_set = B_TRUE; } } else if (mregp->m_src_addr != NULL) { goto fail; } /* * The format of the m_pdata is specific to the plugin. It is * passed in as an argument to all of the plugin callbacks. The * driver can update this information by calling * mac_pdata_update(). */ if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) { /* * Verify if the supplied plugin data is valid. Note that * even if the caller passed in a NULL pointer as plugin data, * we still need to verify if that's valid as the plugin may * require plugin data to function. */ if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata, mregp->m_pdata_size)) { goto fail; } if (mregp->m_pdata != NULL) { mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP); bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size); mip->mi_pdata_size = mregp->m_pdata_size; } } else if (mregp->m_pdata != NULL) { /* * The caller supplied non-NULL plugin data, but the plugin * does not recognize plugin data. */ err = EINVAL; goto fail; } /* * Register the private properties. */ mac_register_priv_prop(mip, mregp->m_priv_props); /* * Stash the driver callbacks into the mac_impl_t, but first sanity * check to make sure all mandatory callbacks are set. */ if (mregp->m_callbacks->mc_getstat == NULL || mregp->m_callbacks->mc_start == NULL || mregp->m_callbacks->mc_stop == NULL || mregp->m_callbacks->mc_setpromisc == NULL || mregp->m_callbacks->mc_multicst == NULL) { goto fail; } mip->mi_callbacks = mregp->m_callbacks; if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, &mip->mi_capab_legacy)) { mip->mi_state_flags |= MIS_LEGACY; mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev; } else { mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), mip->mi_minor); } /* * Allocate a notification thread. thread_create blocks for memory * if needed, it never fails. */ mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread, mip, 0, &p0, TS_RUN, minclsyspri); /* * Initialize the capabilities */ bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) mip->mi_state_flags |= MIS_IS_VNIC; if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL)) mip->mi_state_flags |= MIS_IS_AGGR; mac_addr_factory_init(mip); /* * Enforce the virtrualization level registered. */ if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) { if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 || mac_init_rings(mip, MAC_RING_TYPE_TX) != 0) goto fail; /* * The driver needs to register at least rx rings for this * virtualization level. */ if (mip->mi_rx_groups == NULL) goto fail; } /* * The driver must set mc_unicst entry point to NULL when it advertises * CAP_RINGS for rx groups. */ if (mip->mi_rx_groups != NULL) { if (mregp->m_callbacks->mc_unicst != NULL) goto fail; } else { if (mregp->m_callbacks->mc_unicst == NULL) goto fail; } /* * Initialize MAC addresses. Must be called after mac_init_rings(). */ mac_init_macaddr(mip); mip->mi_share_capab.ms_snum = 0; if (mip->mi_v12n_level & MAC_VIRT_HIO) { (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES, &mip->mi_share_capab); } /* * Initialize the kstats for this device. */ mac_driver_stat_create(mip); /* Zero out any properties. */ bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); if (mip->mi_minor <= MAC_MAX_MINOR) { /* Create a style-2 DLPI device */ if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0, DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) goto fail; style2_created = B_TRUE; /* Create a style-1 DLPI device */ if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS) goto fail; style1_created = B_TRUE; } mac_flow_l2tab_create(mip, &mip->mi_flow_tab); rw_enter(&i_mac_impl_lock, RW_WRITER); if (mod_hash_insert(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) { rw_exit(&i_mac_impl_lock); err = EEXIST; goto fail; } DTRACE_PROBE2(mac__register, struct devnames *, dnp, (mac_impl_t *), mip); /* * Mark the MAC to be ready for open. */ mip->mi_state_flags &= ~MIS_DISABLED; rw_exit(&i_mac_impl_lock); atomic_inc_32(&i_mac_impl_count); cmn_err(CE_NOTE, "!%s registered", mip->mi_name); *mhp = (mac_handle_t)mip; return (0); fail: if (style1_created) ddi_remove_minor_node(mip->mi_dip, mip->mi_name); if (style2_created) ddi_remove_minor_node(mip->mi_dip, driver); mac_addr_factory_fini(mip); /* Clean up registered MAC addresses */ mac_fini_macaddr(mip); /* Clean up registered rings */ mac_free_rings(mip, MAC_RING_TYPE_RX); mac_free_rings(mip, MAC_RING_TYPE_TX); /* Clean up notification thread */ if (mip->mi_notify_thread != NULL) i_mac_notify_exit(mip); if (mip->mi_info.mi_unicst_addr != NULL) { kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); mip->mi_info.mi_unicst_addr = NULL; } mac_driver_stat_delete(mip); if (mip->mi_type != NULL) { atomic_dec_32(&mip->mi_type->mt_ref); mip->mi_type = NULL; } if (mip->mi_pdata != NULL) { kmem_free(mip->mi_pdata, mip->mi_pdata_size); mip->mi_pdata = NULL; mip->mi_pdata_size = 0; } if (minor != 0) { ASSERT(minor > MAC_MAX_MINOR); mac_minor_rele(minor); } mip->mi_state_flags = 0; mac_unregister_priv_prop(mip); /* * Clear the state before destroying the mac_impl_t */ mip->mi_state_flags = 0; kmem_cache_free(i_mac_impl_cachep, mip); return (err); }
/* * Unregister from the GLDv3 framework */ int mac_unregister(mac_handle_t mh) { int err; mac_impl_t *mip = (mac_impl_t *)mh; mod_hash_val_t val; mac_margin_req_t *mmr, *nextmmr; /* Fail the unregister if there are any open references to this mac. */ if ((err = mac_disable_nowait(mh)) != 0) return (err); /* * Clean up notification thread and wait for it to exit. */ i_mac_notify_exit(mip); i_mac_perim_enter(mip); /* * There is still resource properties configured over this mac. */ if (mip->mi_resource_props.mrp_mask != 0) mac_fastpath_enable((mac_handle_t)mip); if (mip->mi_minor < MAC_MAX_MINOR + 1) { ddi_remove_minor_node(mip->mi_dip, mip->mi_name); ddi_remove_minor_node(mip->mi_dip, (char *)ddi_driver_name(mip->mi_dip)); } ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & MIS_EXCLUSIVE)); mac_driver_stat_delete(mip); (void) mod_hash_remove(i_mac_impl_hash, (mod_hash_key_t)mip->mi_name, &val); ASSERT(mip == (mac_impl_t *)val); ASSERT(i_mac_impl_count > 0); atomic_dec_32(&i_mac_impl_count); if (mip->mi_pdata != NULL) kmem_free(mip->mi_pdata, mip->mi_pdata_size); mip->mi_pdata = NULL; mip->mi_pdata_size = 0; /* * Free the list of margin request. */ for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) { nextmmr = mmr->mmr_nextp; kmem_free(mmr, sizeof (mac_margin_req_t)); } mip->mi_mmrp = NULL; mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN; kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); mip->mi_info.mi_unicst_addr = NULL; atomic_dec_32(&mip->mi_type->mt_ref); mip->mi_type = NULL; /* * Free the primary MAC address. */ mac_fini_macaddr(mip); /* * free all rings */ mac_free_rings(mip, MAC_RING_TYPE_RX); mac_free_rings(mip, MAC_RING_TYPE_TX); mac_addr_factory_fini(mip); bzero(mip->mi_addr, MAXMACADDRLEN); bzero(mip->mi_dstaddr, MAXMACADDRLEN); mip->mi_dstaddr_set = B_FALSE; /* and the flows */ mac_flow_tab_destroy(mip->mi_flow_tab); mip->mi_flow_tab = NULL; if (mip->mi_minor > MAC_MAX_MINOR) mac_minor_rele(mip->mi_minor); cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name); /* * Reset the perim related fields to default values before * kmem_cache_free */ i_mac_perim_exit(mip); mip->mi_state_flags = 0; mac_unregister_priv_prop(mip); ASSERT(mip->mi_bridge_link == NULL); kmem_cache_free(i_mac_impl_cachep, mip); return (0); }