static void splat_thread_work3_common(thread_priv_t *tp) { ulong_t rnd; int i, rc = 0; /* set a unique value for each key using a random value */ get_random_bytes((void *)&rnd, 4); for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++) tsd_set(tp->tp_keys[i], (void *)(i + rnd)); /* verify the unique value for each key */ for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++) if (tsd_get(tp->tp_keys[i]) != (void *)(i + rnd)) rc = -EINVAL; /* set the value to thread_priv_t for use by the destructor */ for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++) tsd_set(tp->tp_keys[i], (void *)tp); spin_lock(&tp->tp_lock); if (rc && !tp->tp_rc) tp->tp_rc = rc; tp->tp_count++; wake_up_all(&tp->tp_waitq); spin_unlock(&tp->tp_lock); }
void muidl_supp_alloc_context(unsigned int length) { if(muidl_ctx_key == -1) { tsd_key_create(&muidl_ctx_key, &free); } void *ptr = tsd_get(muidl_ctx_key); if(ptr == NULL) { if(length < 64) length = 64; ptr = malloc(length); if(ptr == NULL) { printf("%s: can't alloc length=%u bytes!\n", __func__, length); abort(); } memset(ptr, '\0', length); tsd_set(muidl_ctx_key, ptr); assert(tsd_get(muidl_ctx_key) == ptr); } }
static struct drop_param *get_ctx(void) { struct drop_param *ptr = tsd_get(ctx_key()); if(ptr == NULL) { ptr = calloc(1, sizeof(*ptr)); for(int i=0; i < LOG_SIZE; i++) ptr->log[i] = L4_Nilpage; ptr->log_top = LOG_SIZE - 1; /* start at 0 */ tsd_set(ctx_key(), ptr); } return ptr; }
/* * Add a node to the head of the singly linked list. */ static void rrn_add(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rn = kmem_alloc(sizeof (*rn), KM_PUSHPAGE); rn->rn_rrl = rrl; rn->rn_next = tsd_get(rrw_tsd_key); rn->rn_tag = tag; VERIFY(tsd_set(rrw_tsd_key, rn) == 0); }
/*ARGSUSED*/ void top_end_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size) { ml_unit_t *ul = ufsvfsp->vfs_log; mt_map_t *mtm = ul->un_logmap; threadtrans_t *tp = tsd_get(topkey); int wakeup_needed = 0; ASSERT(tp); ASSERT(ufsvfsp->vfs_dev == ul->un_dev); ASSERT(((ul->un_debug & MT_TRANSACT) == 0) || top_end_debug(ul, mtm, topid, size)); mutex_enter(&mtm->mtm_lock); if (size > tp->deltas_size) { ul->un_resv -= (size - tp->deltas_size); } if (tp->any_deltas) { tp->last_async_tid = mtm->mtm_tid; } mtm->mtm_ref = 1; mtm->mtm_active--; if ((mtm->mtm_active == 0) && (mtm->mtm_closed == (TOP_SYNC|TOP_ASYNC))) { wakeup_needed = 1; } mutex_exit(&mtm->mtm_lock); if (wakeup_needed) cv_signal(&mtm->mtm_cv_eot); /* * Generate a sync op if the log, logmap, or deltamap are heavily used. * Unless we are possibly holding any VM locks, since if we are holding * any VM locks and we issue a top_end_sync(), we could deadlock. */ if ((mtm->mtm_activesync == 0) && !(mtm->mtm_closed & TOP_SYNC) && (deltamap_need_commit(ul->un_deltamap) || logmap_need_commit(mtm) || ldl_need_commit(ul)) && (topid != TOP_GETPAGE)) { top_issue_sync(ufsvfsp); } /* * roll something from the log if the logmap is too full */ if (logmap_need_roll_async(mtm)) logmap_forceroll_nowait(mtm); }
static rrw_node_t * rrn_find(rrwlock_t *rrl) { rrw_node_t *rn; if (refcount_count(&rrl->rr_linked_rcount) == 0) return (NULL); for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { if (rn->rn_rrl == rrl) return (rn); } return (NULL); }
/* * If a node is found for 'rrl', then remove the node from this * thread's list and return TRUE; otherwise return FALSE. */ static boolean_t rrn_find_and_remove(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rrw_node_t *prev = NULL; if (refcount_count(&rrl->rr_linked_rcount) == 0) return (B_FALSE); for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { if (rn->rn_rrl == rrl && rn->rn_tag == tag) { if (prev) prev->rn_next = rn->rn_next; else VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0); kmem_free(rn, sizeof (*rn)); return (B_TRUE); } prev = rn; } return (B_FALSE); }
/* * declare a delta */ void top_delta( ufsvfs_t *ufsvfsp, offset_t mof, off_t nb, delta_t dtyp, int (*func)(), ulong_t arg) { ml_unit_t *ul = ufsvfsp->vfs_log; threadtrans_t *tp = tsd_get(topkey); ASSERT(ufsvfsp->vfs_dev == ul->un_dev); ASSERT(nb); ASSERT(((ul->un_debug & (MT_TRANSACT|MT_MATAMAP)) == 0) || top_delta_debug(ul, mof, nb, dtyp)); deltamap_add(ul->un_deltamap, mof, nb, dtyp, func, arg, tp); ul->un_logmap->mtm_ref = 1; /* for roll thread's heuristic */ if (tp) { tp->any_deltas = 1; } }
void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag, zil_callback_t callback, void *callback_data) { itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; ssize_t immediate_write_sz; if (zil_replaying(zilog, tx) || zp->z_unlinked) { if (callback != NULL) callback(callback_data); return; } immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) ? 0 : (ssize_t)zfs_immediate_write_sz; slogging = spa_has_slogs(zilog->zl_spa) && (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; else write_state = WR_NEED_COPY; if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); } while (resid) { itx_t *itx; lr_write_t *lr; ssize_t len; /* * If the write would overflow the largest block then split it. */ if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA) len = SPA_MAXBLOCKSIZE >> 1; else len = resid; itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; } itx->itx_wr_state = write_state; if (write_state == WR_NEED_COPY) itx->itx_sod += len; lr->lr_foid = zp->z_id; lr->lr_offset = off; lr->lr_length = len; lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); itx->itx_private = ZTOZSB(zp); if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && (fsync_cnt == 0)) itx->itx_sync = B_FALSE; itx->itx_callback = callback; itx->itx_callback_data = callback_data; zil_itx_assign(zilog, itx, tx); off += len; resid -= len; }
void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag) { uint32_t blocksize = zp->z_blksz; itx_wr_state_t write_state; uintptr_t fsync_cnt; if (zil_replaying(zilog, tx) || zp->z_unlinked) return; if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) write_state = WR_INDIRECT; else if (!spa_has_slogs(zilog->zl_spa) && resid >= zfs_immediate_write_sz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; else write_state = WR_NEED_COPY; if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); } while (resid) { itx_t *itx; lr_write_t *lr; itx_wr_state_t wr_state = write_state; ssize_t len = resid; if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA) wr_state = WR_NEED_COPY; else if (wr_state == WR_INDIRECT) len = MIN(blocksize - P2PHASE(off, blocksize), resid); itx = zil_itx_create(txtype, sizeof (*lr) + (wr_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; if (wr_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; wr_state = WR_NEED_COPY; } itx->itx_wr_state = wr_state; lr->lr_foid = zp->z_id; lr->lr_offset = off; lr->lr_length = len; lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); itx->itx_private = zp->z_zfsvfs; if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && (fsync_cnt == 0)) itx->itx_sync = B_FALSE; zil_itx_assign(zilog, itx, tx); off += len; resid -= len; } }
int top_begin_async(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int tryasync) { ml_unit_t *ul = ufsvfsp->vfs_log; mt_map_t *mtm = ul->un_logmap; threadtrans_t *tp; ASSERT(ufsvfsp->vfs_dev == ul->un_dev); tp = tsd_get(topkey); if (tp == NULL) { tp = kmem_zalloc(sizeof (threadtrans_t), KM_SLEEP); (void) tsd_set(topkey, tp); } tp->deltas_size = 0; tp->any_deltas = 0; mutex_enter(&mtm->mtm_lock); retry: mtm->mtm_ref = 1; /* * current transaction closed to async ops; try for next transaction */ if ((mtm->mtm_closed & TOP_ASYNC) && !panicstr) { if (tryasync) { mutex_exit(&mtm->mtm_lock); tryfail_cnt++; return (EWOULDBLOCK); } cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock); goto retry; } /* * if the current transaction is full; try the next one */ if (((size + ul->un_resv + ul->un_resv_wantin) > ul->un_maxresv) && !panicstr) { /* * log is overreserved and no one will unresv the space * so generate empty sync op to unresv the space * We need TOP_SYNC_FORCED because we want to know when * a top_end_sync is completed. * mtm_taskq_sync_count is needed because we want to keep track * of the pending top_issue_sync dispatches so that during * forced umount we can wait for these to complete. * mtm_taskq_sync_count is decremented in top_issue_sync and * can remain set even after top_end_sync completes. * We have a window between the clearing of TOP_SYNC_FORCED * flag and the decrementing of mtm_taskq_sync_count. * If in this window new async transactions start consuming * log space, the log can get overreserved. * Subsequently a new async transaction would fail to generate * an empty sync transaction via the taskq, since it finds * the mtm_taskq_sync_count set. This can cause a hang. * Hence we do not test for mtm_taskq_sync_count being zero. * Instead, the TOP_SYNC_FORCED flag is tested here. */ if ((mtm->mtm_activesync == 0) && (!(mtm->mtm_closed & TOP_SYNC_FORCED))) { /* * Set flag to stop multiple forced empty * sync transactions. Increment mtm_taskq_sync_count. */ mtm->mtm_closed |= TOP_SYNC_FORCED; mtm->mtm_taskq_sync_count++; mutex_exit(&mtm->mtm_lock); (void) taskq_dispatch(system_taskq, top_issue_sync, ufsvfsp, TQ_SLEEP); if (tryasync) { tryfail_cnt++; return (EWOULDBLOCK); } mutex_enter(&mtm->mtm_lock); goto retry; } if (tryasync) { mutex_exit(&mtm->mtm_lock); tryfail_cnt++; return (EWOULDBLOCK); } cv_wait(&mtm->mtm_cv_next, &mtm->mtm_lock); goto retry; } /* * we are in the current transaction */ mtm->mtm_active++; ul->un_resv += size; ASSERT(mtm->mtm_active > 0); mutex_exit(&mtm->mtm_lock); ASSERT(((ul->un_debug & MT_TRANSACT) == 0) || top_begin_debug(ul, topid, size)); return (0); }
/*ARGSUSED*/ void top_begin_sync(ufsvfs_t *ufsvfsp, top_t topid, ulong_t size, int *error) { ml_unit_t *ul = ufsvfsp->vfs_log; mt_map_t *mtm = ul->un_logmap; threadtrans_t *tp; ushort_t seq; ASSERT(ufsvfsp->vfs_dev == ul->un_dev); ASSERT(error != NULL); ASSERT(*error == 0); mutex_enter(&mtm->mtm_lock); if (topid == TOP_FSYNC) { /* * Error the fsync immediately if this is an nfs thread * and its last transaction has already been committed. * The only transactions outstanding are those * where no commit has even started * (last_async_tid == mtm->mtm_tid) * or those where a commit is in progress * (last_async_tid == mtm->mtm_committid) */ if (curthread->t_flag & T_DONTPEND) { tp = tsd_get(topkey); if (tp && (tp->last_async_tid != mtm->mtm_tid) && (tp->last_async_tid != mtm->mtm_committid)) { mutex_exit(&mtm->mtm_lock); *error = 1; return; } } /* * If there's already other synchronous transactions * and we haven't allowed async ones to start yet * then just wait for the commit to complete. */ if (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) == (TOP_SYNC | TOP_ASYNC)) || mtm->mtm_activesync) { seq = mtm->mtm_seq; do { cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); } while (seq == mtm->mtm_seq); mutex_exit(&mtm->mtm_lock); *error = 1; return; } if (mtm->mtm_closed & TOP_SYNC) { /* * We know we're in the window where a thread is * committing a transaction in top_end_sync() and * has allowed async threads to start but hasn't * got the completion on the commit write to * allow sync threads to start. * So wait for that commit completion then retest * for the quick nfs check and if that fails * go on to start a transaction */ seq = mtm->mtm_seq; do { cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); } while (seq == mtm->mtm_seq); /* tp is set above if T_DONTPEND */ if ((curthread->t_flag & T_DONTPEND) && tp && (tp->last_async_tid != mtm->mtm_tid) && (tp->last_async_tid != mtm->mtm_committid)) { mutex_exit(&mtm->mtm_lock); *error = 1; return; } } } retry: mtm->mtm_ref = 1; /* * current transaction closed to sync ops; try for next transaction */ if ((mtm->mtm_closed & TOP_SYNC) && !panicstr) { ulong_t resv; /* * We know a commit is in progress, if we are trying to * commit and we haven't allowed async ones to start yet, * then just wait for the commit completion */ if ((size == TOP_COMMIT_SIZE) && (((mtm->mtm_closed & (TOP_SYNC | TOP_ASYNC)) == (TOP_SYNC | TOP_ASYNC)) || (mtm->mtm_activesync))) { seq = mtm->mtm_seq; do { cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); } while (seq == mtm->mtm_seq); mutex_exit(&mtm->mtm_lock); *error = 1; return; } /* * next transaction is full; try for next transaction */ resv = size + ul->un_resv_wantin + ul->un_resv; if (resv > ul->un_maxresv) { cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); goto retry; } /* * we are in the next transaction; wait for it to start */ mtm->mtm_wantin++; ul->un_resv_wantin += size; /* * The corresponding cv_broadcast wakes up * all threads that have been validated to go into * the next transaction. However, because spurious * cv_wait wakeups are possible we use a sequence * number to check that the commit and cv_broadcast * has really occurred. We couldn't use mtm_tid * because on error that doesn't get incremented. */ seq = mtm->mtm_seq; do { cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); } while (seq == mtm->mtm_seq); } else { /* * if the current transaction is full; try the next one */ if (size && (ul->un_resv && ((size + ul->un_resv) > ul->un_maxresv)) && !panicstr) { /* * log is over reserved and no one will unresv the space * so generate empty sync op to unresv the space */ if (mtm->mtm_activesync == 0) { mutex_exit(&mtm->mtm_lock); top_issue_sync(ufsvfsp); mutex_enter(&mtm->mtm_lock); goto retry; } cv_wait(&mtm->mtm_cv_commit, &mtm->mtm_lock); goto retry; } /* * we are in the current transaction */ mtm->mtm_active++; mtm->mtm_activesync++; ul->un_resv += size; } ASSERT(mtm->mtm_active > 0); ASSERT(mtm->mtm_activesync > 0); mutex_exit(&mtm->mtm_lock); ASSERT(((ul->un_debug & MT_TRANSACT) == 0) || top_begin_debug(ul, topid, size)); }
void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag) { itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; if (zilog == NULL || zp->z_unlinked) return; /* * Writes are handled in three different ways: * * WR_INDIRECT: * If the write is greater than zfs_immediate_write_sz and there are * no separate logs in this pool then later *if* we need to log the * write then dmu_sync() is used to immediately write the block and * its block pointer is put in the log record. * WR_COPIED: * If we know we'll immediately be committing the * transaction (FDSYNC (O_DSYNC)), the we allocate a larger * log record here for the data and copy the data in. * WR_NEED_COPY: * Otherwise we don't allocate a buffer, and *if* we need to * flush the write later then a buffer is allocated and * we retrieve the data using the dmu. */ slogging = spa_has_slogs(zilog->zl_spa); if (resid > zfs_immediate_write_sz && !slogging) write_state = WR_INDIRECT; else if (ioflag & FDSYNC) write_state = WR_COPIED; else write_state = WR_NEED_COPY; #ifndef __APPLE__ if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); } #endif while (resid) { itx_t *itx; lr_write_t *lr; ssize_t len; /* * If there are slogs and the write would overflow the largest * block, then because we don't want to use the main pool * to dmu_sync, we have to split the write. */ if (slogging && resid > ZIL_MAX_LOG_DATA) len = SPA_MAXBLOCKSIZE >> 1; else len = resid; itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, zp->z_id, off, len, lr + 1) != 0) { kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; } itx->itx_wr_state = write_state; lr->lr_foid = zp->z_id; lr->lr_offset = off; lr->lr_length = len; lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); itx->itx_private = zp->z_zfsvfs; if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0)) itx->itx_sync = B_TRUE; else itx->itx_sync = B_FALSE; zp->z_last_itx = zil_itx_assign(zilog, itx, tx); off += len; resid -= len; }
void *muidl_supp_get_context(void) { if(unlikely(muidl_ctx_key < 0)) return NULL; return tsd_get(muidl_ctx_key); }
void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, int ioflag) { itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; if (zilog == NULL || zp->z_unlinked) return; ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ slogging = spa_has_slogs(zilog->zl_spa); if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; else write_state = WR_NEED_COPY; if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); } while (resid) { itx_t *itx; lr_write_t *lr; ssize_t len; /* * If the write would overflow the largest block then split it. */ if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA) len = SPA_MAXBLOCKSIZE >> 1; else len = resid; itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; } itx->itx_wr_state = write_state; if (write_state == WR_NEED_COPY) itx->itx_sod += len; lr->lr_foid = zp->z_id; lr->lr_offset = off; lr->lr_length = len; lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); itx->itx_private = zp->z_zfsvfs; if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || (ioflag & (FSYNC | FDSYNC))) itx->itx_sync = B_TRUE; else itx->itx_sync = B_FALSE; zp->z_last_itx = zil_itx_assign(zilog, itx, tx); off += len; resid -= len; }