const void * h_put(HASHSET h, const void *key) { uint_t hash = h->h_hash(key); uint_t indx = hash % h->h_tableSize; ENTRY *e; for (e = h->h_table[indx]; e; e = e->e_next) if (e->e_hash == hash && h->h_equal(e->e_key, key)) return (key); if (h->h_count >= h->h_threshold) { rehash(h); indx = hash % h->h_tableSize; } e = exmalloc(sizeof (ENTRY)); e->e_hash = hash; e->e_key = (void *) key; e->e_next = h->h_table[indx]; h->h_table[indx] = e; h->h_count++; DTRACE_PROBE2(mountd, hashset, h->h_count, h->h_loadFactor); return (NULL); }
int rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; readlock_t *readlockp; tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); int error; /* * If we already hold a readers lock on this rwlock, * just increment our reference count and return. */ sigoff(self); readlockp = rwl_entry(rwlp); if (readlockp->rd_count != 0) { if (readlockp->rd_count == READ_LOCK_MAX) { sigon(self); error = EAGAIN; goto out; } sigon(self); error = 0; goto out; } sigon(self); /* * If we hold the writer lock, bail out. */ if (rw_write_held(rwlp)) { if (self->ul_error_detection) rwlock_error(rwlp, "rwlock_rdlock", "calling thread owns the writer lock"); error = EDEADLK; goto out; } if (read_lock_try(rwlp, 0)) error = 0; else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ error = shared_rwlock_lock(rwlp, tsp, READ_LOCK); else /* user-level */ error = rwlock_lock(rwlp, tsp, READ_LOCK); out: if (error == 0) { sigoff(self); rwl_entry(rwlp)->rd_count++; sigon(self); if (rwsp) tdb_incr(rwsp->rw_rdlock); DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); } else { DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error); } return (error); }
/* * This function is invoked for packets received by the MAC driver in * interrupt context. The ring generation number provided by the driver * is matched with the ring generation number held in MAC. If they do not * match, received packets are considered stale packets coming from an older * assignment of the ring. Drop them. */ void mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain, uint64_t mr_gen_num) { mac_ring_t *mr = (mac_ring_t *)mrh; if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) { DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t, mr->mr_gen_num, uint64_t, mr_gen_num); freemsgchain(mp_chain); return; } mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain); }
/* * Charge project of thread t the time thread t spent on CPU since previously * adjusted. * * Record the current on-CPU time in the csc structure. * * Do not adjust for more than one tick worth of time. * * It is possible that the project cap is being disabled while this routine is * executed. This should not cause any issues since the association between the * thread and its project is protected by thread lock. */ static void caps_charge_adjust(kthread_id_t t, caps_sc_t *csc) { kproject_t *kpj = ttoproj(t); hrtime_t new_usage; hrtime_t usage_delta; ASSERT(THREAD_LOCK_HELD(t)); ASSERT(kpj->kpj_cpucap != NULL); /* Get on-CPU time since birth of a thread */ new_usage = mstate_thread_onproc_time(t); /* Time spent on CPU since last checked */ usage_delta = new_usage - csc->csc_cputime; /* Save the accumulated on-CPU time */ csc->csc_cputime = new_usage; /* Charge at most one tick worth of on-CPU time */ if (usage_delta > cap_tick_cost) usage_delta = cap_tick_cost; /* Add usage_delta to the project usage value. */ if (usage_delta > 0) { cpucap_t *cap = kpj->kpj_cpucap; DTRACE_PROBE2(cpucaps__project__charge, kthread_id_t, t, hrtime_t, usage_delta); disp_lock_enter_high(&cap->cap_usagelock); cap->cap_usage += usage_delta; /* Check for overflows */ if (cap->cap_usage < 0) cap->cap_usage = MAX_USAGE - 1; disp_lock_exit_high(&cap->cap_usagelock); /* * cap_maxusage is only kept for observability. Move it outside * the lock to reduce the time spent while holding the lock. */ if (cap->cap_usage > cap->cap_maxusage) cap->cap_maxusage = cap->cap_usage; } }
/*ARGSUSED*/ static void balloon_handler(struct xenbus_watch *watch, const char **vec, uint_t len) { ulong_t new_target_kb; pgcnt_t new_target_pages; int rv; static uchar_t warning_cnt = 0; rv = xenbus_scanf(0, "memory", "target", "%lu", &new_target_kb); if (rv != 0) { return; } /* new_target is in kB - change this to pages */ new_target_pages = kbtop(new_target_kb); DTRACE_PROBE1(balloon__new__target, pgcnt_t, new_target_pages); /* * Unfortunately, dom0 may give us a target that is larger than * our max limit. Re-check the limit, and, if the new target is * too large, adjust it downwards. */ mutex_enter(&bln_mutex); if (new_target_pages > bln_stats.bln_max_pages) { DTRACE_PROBE2(balloon__target__too__large, pgcnt_t, new_target_pages, pgcnt_t, bln_stats.bln_max_pages); if (!DOMAIN_IS_INITDOMAIN(xen_info) || warning_cnt != 0) { cmn_err(CE_WARN, "New balloon target (0x%lx pages) is " "larger than original memory size (0x%lx pages). " "Ballooning beyond original memory size is not " "allowed.", new_target_pages, bln_stats.bln_max_pages); } warning_cnt = 1; bln_stats.bln_new_target = bln_stats.bln_max_pages; } else { bln_stats.bln_new_target = new_target_pages; } mutex_exit(&bln_mutex); cv_signal(&bln_cv); }
/* * dm2s_transmit - Transmit a message. */ int dm2s_transmit(queue_t *wq, mblk_t *mp, target_id_t target, mkey_t key) { dm2s_t *dm2sp = (dm2s_t *)wq->q_ptr; int ret; uint32_t len; uint32_t numsg; DPRINTF(DBG_DRV, ("dm2s_transmit: called\n")); ASSERT(dm2sp != NULL); ASSERT(MUTEX_HELD(&dm2sp->ms_lock)); /* * Free the message if the mailbox is not in the connected state. */ if (!DM2S_MBOX_READY(dm2sp)) { DPRINTF(DBG_MBOX, ("dm2s_transmit: mailbox not ready yet\n")); freemsg(mp); return (EIO); } len = msgdsize(mp); if (len > dm2sp->ms_mtu) { /* * Size is too big to send, free the message. */ DPRINTF(DBG_MBOX, ("dm2s_transmit: message too large\n")); DTRACE_PROBE2(dm2s_msg_too_big, dm2s_t, dm2sp, uint32_t, len); freemsg(mp); return (0); } if ((ret = dm2s_prep_scatgath(mp, &numsg, dm2sp->ms_sg_tx, DM2S_MAX_SG)) != 0) { DPRINTF(DBG_MBOX, ("dm2s_transmit: prep_scatgath failed\n")); putbq(wq, mp); return (EAGAIN); } DPRINTF(DBG_MBOX, ("dm2s_transmit: calling mb_putmsg numsg=%d len=%d\n", numsg, len)); ret = scf_mb_putmsg(target, key, len, numsg, dm2sp->ms_sg_tx, 0); if (ret == EBUSY || ret == ENOSPC) { DPRINTF(DBG_MBOX, ("dm2s_transmit: mailbox busy ret=%d\n", ret)); if (++dm2sp->ms_retries >= DM2S_MAX_RETRIES) { /* * If maximum retries are reached, then free the * message. */ DPRINTF(DBG_MBOX, ("dm2s_transmit: freeing msg after max retries\n")); DTRACE_PROBE2(dm2s_retry_fail, dm2s_t, dm2sp, int, ret); freemsg(mp); dm2sp->ms_retries = 0; return (0); } DTRACE_PROBE2(dm2s_mb_busy, dm2s_t, dm2sp, int, ret); /* * Queue it back, so that we can retry again. */ putbq(wq, mp); return (ret); } DMPBYTES("dm2s: Putmsg: ", len, numsg, dm2sp->ms_sg_tx); dm2sp->ms_retries = 0; freemsg(mp); DPRINTF(DBG_DRV, ("dm2s_transmit: ret=%d\n", ret)); return (ret); }
/* * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, * and trywrlock for process-private (USYNC_THREAD) rwlocks. */ int rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; uint32_t readers; ulwp_t *self = curthread; queue_head_t *qp; ulwp_t *ulwp; int try_flag; int ignore_waiters_flag; int error = 0; try_flag = (rd_wr & TRY_FLAG); rd_wr &= ~TRY_FLAG; ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); if (!try_flag) { DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); } qp = queue_lock(rwlp, MX); /* initial attempt to acquire the lock fails if there are waiters */ ignore_waiters_flag = 0; while (error == 0) { if (rd_wr == READ_LOCK) { if (read_lock_try(rwlp, ignore_waiters_flag)) break; } else { if (write_lock_try(rwlp, ignore_waiters_flag)) break; } /* subsequent attempts do not fail due to waiters */ ignore_waiters_flag = 1; atomic_or_32(rwstate, URW_HAS_WAITERS); readers = *rwstate; ASSERT_CONSISTENT_STATE(readers); if ((readers & URW_WRITE_LOCKED) || (rd_wr == WRITE_LOCK && (readers & URW_READERS_MASK) != 0)) /* EMPTY */; /* somebody holds the lock */ else if ((ulwp = queue_waiter(qp)) == NULL) { atomic_and_32(rwstate, ~URW_HAS_WAITERS); ignore_waiters_flag = 0; continue; /* no queued waiters, start over */ } else { /* * Do a priority check on the queued waiter (the * highest priority thread on the queue) to see * if we should defer to him or just grab the lock. */ int our_pri = real_priority(self); int his_pri = real_priority(ulwp); if (rd_wr == WRITE_LOCK) { /* * We defer to a queued thread that has * a higher priority than ours. */ if (his_pri <= our_pri) { /* * Don't defer, just grab the lock. */ continue; } } else { /* * We defer to a queued thread that has * a higher priority than ours or that * is a writer whose priority equals ours. */ if (his_pri < our_pri || (his_pri == our_pri && !ulwp->ul_writer)) { /* * Don't defer, just grab the lock. */ continue; } } } /* * We are about to block. * If we're doing a trylock, return EBUSY instead. */ if (try_flag) { error = EBUSY; break; } /* * Enqueue writers ahead of readers. */ self->ul_writer = rd_wr; /* *must* be 0 or 1 */ enqueue(qp, self, 0); set_parking_flag(self, 1); queue_unlock(qp); if ((error = __lwp_park(tsp, 0)) == EINTR) error = 0; set_parking_flag(self, 0); qp = queue_lock(rwlp, MX); if (self->ul_sleepq && dequeue_self(qp) == 0) { atomic_and_32(rwstate, ~URW_HAS_WAITERS); ignore_waiters_flag = 0; } self->ul_writer = 0; if (rd_wr == WRITE_LOCK && (*rwstate & URW_WRITE_LOCKED) && rwlp->rwlock_owner == (uintptr_t)self) { /* * We acquired the lock by hand-off * from the previous owner, */ error = 0; /* timedlock did not fail */ break; } } /* * Make one final check to see if there are any threads left * on the rwlock queue. Clear the URW_HAS_WAITERS flag if not. */ if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL) atomic_and_32(rwstate, ~URW_HAS_WAITERS); queue_unlock(qp); if (!try_flag) { DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); } return (error); }
/* * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, * and trywrlock for process-shared (USYNC_PROCESS) rwlocks. * * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock() * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex * released, and if they need to sleep will release the mutex first. In the * event of a spurious wakeup, these will return EAGAIN (because it is much * easier for us to re-acquire the mutex here). */ int shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; mutex_t *mp = &rwlp->mutex; uint32_t readers; int try_flag; int error; try_flag = (rd_wr & TRY_FLAG); rd_wr &= ~TRY_FLAG; ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); if (!try_flag) { DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); } do { if (try_flag && (*rwstate & URW_WRITE_LOCKED)) { error = EBUSY; break; } if ((error = mutex_lock(mp)) != 0) break; if (rd_wr == READ_LOCK) { if (read_lock_try(rwlp, 0)) { (void) mutex_unlock(mp); break; } } else { if (write_lock_try(rwlp, 0)) { (void) mutex_unlock(mp); break; } } atomic_or_32(rwstate, URW_HAS_WAITERS); readers = *rwstate; ASSERT_CONSISTENT_STATE(readers); /* * The calls to __lwp_rwlock_*() below will release the mutex, * so we need a dtrace probe here. The owner field of the * mutex is cleared in the kernel when the mutex is released, * so we should not clear it here. */ DTRACE_PROBE2(plockstat, mutex__release, mp, 0); /* * The waiters bit may be inaccurate. * Only the kernel knows for sure. */ if (rd_wr == READ_LOCK) { if (try_flag) error = __lwp_rwlock_tryrdlock(rwlp); else error = __lwp_rwlock_rdlock(rwlp, tsp); } else { if (try_flag) error = __lwp_rwlock_trywrlock(rwlp); else error = __lwp_rwlock_wrlock(rwlp, tsp); } } while (error == EAGAIN || error == EINTR); if (!try_flag) { DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); } return (error); }
static bool_t xdrrdma_control(XDR *xdrs, int request, void *info) { int32_t *int32p; int len, i; uint_t in_flags; xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); rdma_chunkinfo_t *rcip = NULL; rdma_wlist_conn_info_t *rwcip = NULL; rdma_chunkinfo_lengths_t *rcilp = NULL; struct uio *uiop; struct clist *rwl = NULL, *first = NULL; struct clist *prev = NULL; switch (request) { case XDR_PEEK: /* * Return the next 4 byte unit in the XDR stream. */ if (xdrs->x_handy < sizeof (int32_t)) return (FALSE); int32p = (int32_t *)info; *int32p = (int32_t)ntohl((uint32_t) (*((int32_t *)(xdrp->xp_offp)))); return (TRUE); case XDR_SKIPBYTES: /* * Skip the next N bytes in the XDR stream. */ int32p = (int32_t *)info; len = RNDUP((int)(*int32p)); if ((xdrs->x_handy -= len) < 0) return (FALSE); xdrp->xp_offp += len; return (TRUE); case XDR_RDMA_SET_FLAGS: /* * Set the flags provided in the *info in xp_flags for rdma * xdr stream control. */ int32p = (int32_t *)info; in_flags = (uint_t)(*int32p); xdrp->xp_flags |= in_flags; return (TRUE); case XDR_RDMA_GET_FLAGS: /* * Get the flags provided in xp_flags return through *info */ int32p = (int32_t *)info; *int32p = (int32_t)xdrp->xp_flags; return (TRUE); case XDR_RDMA_GET_CHUNK_LEN: rcilp = (rdma_chunkinfo_lengths_t *)info; rcilp->rcil_len = xdrp->xp_reply_chunk_len; rcilp->rcil_len_alt = xdrp->xp_reply_chunk_len_alt; return (TRUE); case XDR_RDMA_ADD_CHUNK: /* * Store wlist information */ rcip = (rdma_chunkinfo_t *)info; DTRACE_PROBE2(krpc__i__xdrrdma__control__add__chunk, rci_type_t, rcip->rci_type, uint32, rcip->rci_len); switch (rcip->rci_type) { case RCI_WRITE_UIO_CHUNK: xdrp->xp_reply_chunk_len_alt += rcip->rci_len; if ((rcip->rci_len + XDR_RDMA_BUF_OVERHEAD) < xdrp->xp_min_chunk) { xdrp->xp_wcl = NULL; *(rcip->rci_clpp) = NULL; return (TRUE); } uiop = rcip->rci_a.rci_uiop; for (i = 0; i < uiop->uio_iovcnt; i++) { rwl = clist_alloc(); if (first == NULL) first = rwl; rwl->c_len = uiop->uio_iov[i].iov_len; rwl->u.c_daddr = (uint64)(uintptr_t) (uiop->uio_iov[i].iov_base); /* * if userspace address, put adspace ptr in * clist. If not, then do nothing since it's * already set to NULL (from kmem_zalloc) */ if (uiop->uio_segflg == UIO_USERSPACE) { rwl->c_adspc = ttoproc(curthread)->p_as; } if (prev == NULL) prev = rwl; else { prev->c_next = rwl; prev = rwl; } } rwl->c_next = NULL; xdrp->xp_wcl = first; *(rcip->rci_clpp) = first; break; case RCI_WRITE_ADDR_CHUNK: rwl = clist_alloc(); rwl->c_len = rcip->rci_len; rwl->u.c_daddr3 = rcip->rci_a.rci_addr; rwl->c_next = NULL; xdrp->xp_reply_chunk_len_alt += rcip->rci_len; xdrp->xp_wcl = rwl; *(rcip->rci_clpp) = rwl; break; case RCI_REPLY_CHUNK: xdrp->xp_reply_chunk_len += rcip->rci_len; break; } return (TRUE); case XDR_RDMA_GET_WLIST: *((struct clist **)info) = xdrp->xp_wcl; return (TRUE); case XDR_RDMA_SET_WLIST: xdrp->xp_wcl = (struct clist *)info; return (TRUE); case XDR_RDMA_GET_RLIST: *((struct clist **)info) = xdrp->xp_rcl; return (TRUE); case XDR_RDMA_GET_WCINFO: rwcip = (rdma_wlist_conn_info_t *)info; rwcip->rwci_wlist = xdrp->xp_wcl; rwcip->rwci_conn = xdrp->xp_conn; return (TRUE); default: return (FALSE); } }
/* * iscsi_net_sendpdu - send iscsi pdu on socket */ static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags) { uint32_t pad; uint32_t crc_hdr; uint32_t crc_data; uint32_t pad_len; uint32_t data_len; iovec_t iovec[ISCSI_MAX_IOVEC]; int iovlen = 0; size_t total_len = 0; size_t send_len; struct msghdr msg; ASSERT(socket != NULL); ASSERT(ihp != NULL); /* * Let's send the header first. 'hlength' is in 32-bit * quantities, so we need to multiply by four to get bytes */ ASSERT(iovlen < ISCSI_MAX_IOVEC); iovec[iovlen].iov_base = (void *)ihp; iovec[iovlen].iov_len = sizeof (*ihp) + ihp->hlength * 4; total_len += sizeof (*ihp) + ihp->hlength * 4; iovlen++; /* Let's transmit the header digest if we have to. */ if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) { ASSERT(iovlen < ISCSI_MAX_IOVEC); /* * Converting the calculated CRC via htonl is not * necessary because iscsi_crc32c calculates * the value as it expects to be written */ crc_hdr = iscsi_crc32c((char *)ihp, sizeof (iscsi_hdr_t) + ihp->hlength * 4); iovec[iovlen].iov_base = (void *)&crc_hdr; iovec[iovlen].iov_len = sizeof (crc_hdr); total_len += sizeof (crc_hdr); iovlen++; } /* Let's transmit the data if any. */ data_len = ntoh24(ihp->dlength); if (data_len) { ASSERT(iovlen < ISCSI_MAX_IOVEC); iovec[iovlen].iov_base = (void *)data; iovec[iovlen].iov_len = data_len; total_len += data_len; iovlen++; pad_len = ((ISCSI_PAD_WORD_LEN - (data_len & (ISCSI_PAD_WORD_LEN - 1))) & (ISCSI_PAD_WORD_LEN - 1)); /* Let's transmit the data pad if any. */ if (pad_len) { ASSERT(iovlen < ISCSI_MAX_IOVEC); pad = 0; iovec[iovlen].iov_base = (void *)&pad; iovec[iovlen].iov_len = pad_len; total_len += pad_len; iovlen++; } /* Let's transmit the data digest if we have to. */ if ((flags & ISCSI_NET_DATA_DIGEST) != 0) { ASSERT(iovlen < ISCSI_MAX_IOVEC); /* * Converting the calculated CRC via htonl is not * necessary because iscsi_crc32c calculates the * value as it expects to be written */ crc_data = iscsi_crc32c(data, data_len); crc_data = iscsi_crc32c_continued( (char *)&pad, pad_len, crc_data); iovec[iovlen].iov_base = (void *)&crc_data; iovec[iovlen].iov_len = sizeof (crc_data); total_len += sizeof (crc_data); iovlen++; } } DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0], int, iovlen, int, total_len); /* Initialization of the message header. */ bzero(&msg, sizeof (msg)); msg.msg_iov = &iovec[0]; msg.msg_flags = MSG_WAITALL; msg.msg_iovlen = iovlen; send_len = iscsi_net->sendmsg(socket, &msg); DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len); if (total_len != send_len) { return (ISCSI_STATUS_TCP_TX_ERROR); } return (ISCSI_STATUS_SUCCESS); }