static void trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) { trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t start, size, txglimit; ASSERT(vd->vdev_ops->vdev_op_leaf); if (tm == NULL) return; txglimit = MIN(spa->spa_syncing_txg, spa_freeze_txg(spa)) - trim_txg_limit; mutex_enter(&tm->tm_lock); /* * Loop until we send all frees up to the txglimit. */ while ((ts = trim_map_first(tm, txglimit)) != NULL) { list_remove(&tm->tm_head, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts); zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, ts->ts_end - ts->ts_start)); } mutex_exit(&tm->tm_lock); }
static void trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) { trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t size, offset, txgtarget, txgsafe; int64_t hard, soft; hrtime_t timelimit; ASSERT(vd->vdev_ops->vdev_op_leaf); if (tm == NULL) return; timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC; if (vd->vdev_isl2cache) { txgsafe = UINT64_MAX; txgtarget = UINT64_MAX; } else { txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)); if (txgsafe > trim_txg_delay) txgtarget = txgsafe - trim_txg_delay; else txgtarget = 0; } mutex_enter(&tm->tm_lock); hard = 0; if (tm->tm_pending > trim_vdev_max_pending) hard = (tm->tm_pending - trim_vdev_max_pending) / 4; soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64); /* Loop until we have sent all outstanding free's */ while (soft > 0 && (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0)) != NULL) { TRIM_MAP_REM(tm, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts); size = ts->ts_end - ts->ts_start; offset = ts->ts_start; /* * We drop the lock while we call zio_nowait as the IO * scheduler can result in a different IO being run e.g. * a write which would result in a recursive lock. */ mutex_exit(&tm->tm_lock); zio_nowait(zio_trim(zio, spa, vd, offset, size)); soft -= TRIM_MAP_SEGS(size); hard -= TRIM_MAP_SEGS(size); mutex_enter(&tm->tm_lock); } mutex_exit(&tm->tm_lock); }
static void trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) { trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t size, txgtarget, txgsafe; hrtime_t timelimit; ASSERT(vd->vdev_ops->vdev_op_leaf); if (tm == NULL) return; timelimit = gethrtime() - trim_timeout * NANOSEC; if (vd->vdev_isl2cache) { txgsafe = UINT64_MAX; txgtarget = UINT64_MAX; } else { txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)); if (txgsafe > trim_txg_delay) txgtarget = txgsafe - trim_txg_delay; else txgtarget = 0; } mutex_enter(&tm->tm_lock); /* Loop until we have sent all outstanding free's */ while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit)) != NULL) { list_remove(&tm->tm_head, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts); size = ts->ts_end - ts->ts_start; zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size)); TRIM_MAP_SDEC(tm, size); TRIM_MAP_QDEC(tm); } mutex_exit(&tm->tm_lock); }
/* * Free up all in-memory intent log transactions that have now been synced. */ static void zil_itx_clean(zilog_t *zilog) { uint64_t synced_txg = spa_last_synced_txg(zilog->zl_spa); uint64_t freeze_txg = spa_freeze_txg(zilog->zl_spa); list_t clean_list; itx_t *itx; list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node)); mutex_enter(&zilog->zl_lock); /* wait for a log writer to finish walking list */ while (zilog->zl_writer) { cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock); } /* * Move the sync'd log transactions to a separate list so we can call * kmem_free without holding the zl_lock. * * There is no need to set zl_writer as we don't drop zl_lock here */ while ((itx = list_head(&zilog->zl_itx_list)) != NULL && itx->itx_lr.lrc_txg <= MIN(synced_txg, freeze_txg)) { list_remove(&zilog->zl_itx_list, itx); zilog->zl_itx_list_sz -= itx->itx_sod; list_insert_tail(&clean_list, itx); } cv_broadcast(&zilog->zl_cv_writer); mutex_exit(&zilog->zl_lock); /* destroy sync'd log transactions */ while ((itx = list_head(&clean_list)) != NULL) { list_remove(&clean_list, itx); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } list_destroy(&clean_list); }
static lwb_t * zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) { lr_t *lrc = &itx->itx_lr; /* common log record */ lr_write_t *lr = (lr_write_t *)lrc; uint64_t txg = lrc->lrc_txg; uint64_t reclen = lrc->lrc_reclen; uint64_t dlen; if (lwb == NULL) return (NULL); ASSERT(lwb->lwb_buf != NULL); if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) dlen = P2ROUNDUP_TYPED( lr->lr_length, sizeof (uint64_t), uint64_t); else dlen = 0; zilog->zl_cur_used += (reclen + dlen); zil_lwb_write_init(zilog, lwb); /* * If this record won't fit in the current log block, start a new one. */ if (lwb->lwb_nused + reclen + dlen > ZIL_BLK_DATA_SZ(lwb)) { lwb = zil_lwb_write_start(zilog, lwb); if (lwb == NULL) return (NULL); zil_lwb_write_init(zilog, lwb); ASSERT(lwb->lwb_nused == 0); if (reclen + dlen > ZIL_BLK_DATA_SZ(lwb)) { txg_wait_synced(zilog->zl_dmu_pool, txg); return (lwb); } } /* * Update the lrc_seq, to be log record sequence number. See zil.h * Then copy the record to the log buffer. */ lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */ bcopy(lrc, lwb->lwb_buf + lwb->lwb_nused, reclen); /* * If it's a write, fetch the data or get its blkptr as appropriate. */ if (lrc->lrc_txtype == TX_WRITE) { if (txg > spa_freeze_txg(zilog->zl_spa)) txg_wait_synced(zilog->zl_dmu_pool, txg); if (itx->itx_wr_state != WR_COPIED) { char *dbuf; int error; /* alignment is guaranteed */ lr = (lr_write_t *)(lwb->lwb_buf + lwb->lwb_nused); if (dlen) { ASSERT(itx->itx_wr_state == WR_NEED_COPY); dbuf = lwb->lwb_buf + lwb->lwb_nused + reclen; lr->lr_common.lrc_reclen += dlen; } else { ASSERT(itx->itx_wr_state == WR_INDIRECT); dbuf = NULL; } error = zilog->zl_get_data( itx->itx_private, lr, dbuf, lwb->lwb_zio); if (error == EIO) { txg_wait_synced(zilog->zl_dmu_pool, txg); return (lwb); } if (error) { ASSERT(error == ENOENT || error == EEXIST || error == EALREADY); return (lwb); } } } lwb->lwb_nused += reclen + dlen; lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); ASSERT3U(lwb->lwb_nused, <=, ZIL_BLK_DATA_SZ(lwb)); ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0); return (lwb); }
static void zil_commit_writer(zilog_t *zilog, uint64_t seq, uint64_t foid) { uint64_t txg; uint64_t commit_seq = 0; itx_t *itx, *itx_next = (itx_t *)-1; lwb_t *lwb; spa_t *spa; zilog->zl_writer = B_TRUE; ASSERT(zilog->zl_root_zio == NULL); spa = zilog->zl_spa; if (zilog->zl_suspend) { lwb = NULL; } else { lwb = list_tail(&zilog->zl_lwb_list); if (lwb == NULL) { /* * Return if there's nothing to flush before we * dirty the fs by calling zil_create() */ if (list_is_empty(&zilog->zl_itx_list)) { zilog->zl_writer = B_FALSE; return; } mutex_exit(&zilog->zl_lock); zil_create(zilog); mutex_enter(&zilog->zl_lock); lwb = list_tail(&zilog->zl_lwb_list); } } /* Loop through in-memory log transactions filling log blocks. */ DTRACE_PROBE1(zil__cw1, zilog_t *, zilog); for (;;) { /* * Find the next itx to push: * Push all transactions related to specified foid and all * other transactions except TX_WRITE, TX_TRUNCATE, * TX_SETATTR and TX_ACL for all other files. */ if (itx_next != (itx_t *)-1) itx = itx_next; else itx = list_head(&zilog->zl_itx_list); for (; itx != NULL; itx = list_next(&zilog->zl_itx_list, itx)) { if (foid == 0) /* push all foids? */ break; if (itx->itx_sync) /* push all O_[D]SYNC */ break; switch (itx->itx_lr.lrc_txtype) { case TX_SETATTR: case TX_WRITE: case TX_TRUNCATE: case TX_ACL: /* lr_foid is same offset for these records */ if (((lr_write_t *)&itx->itx_lr)->lr_foid != foid) { continue; /* skip this record */ } } break; } if (itx == NULL) break; if ((itx->itx_lr.lrc_seq > seq) && ((lwb == NULL) || (lwb->lwb_nused == 0) || (lwb->lwb_nused + itx->itx_sod > ZIL_BLK_DATA_SZ(lwb)))) { break; } /* * Save the next pointer. Even though we soon drop * zl_lock all threads that may change the list * (another writer or zil_itx_clean) can't do so until * they have zl_writer. */ itx_next = list_next(&zilog->zl_itx_list, itx); list_remove(&zilog->zl_itx_list, itx); zilog->zl_itx_list_sz -= itx->itx_sod; mutex_exit(&zilog->zl_lock); txg = itx->itx_lr.lrc_txg; ASSERT(txg); if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa)) lwb = zil_lwb_commit(zilog, itx, lwb); kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); mutex_enter(&zilog->zl_lock); } DTRACE_PROBE1(zil__cw2, zilog_t *, zilog); /* determine commit sequence number */ itx = list_head(&zilog->zl_itx_list); if (itx) commit_seq = itx->itx_lr.lrc_seq; else commit_seq = zilog->zl_itx_seq; mutex_exit(&zilog->zl_lock); /* write the last block out */ if (lwb != NULL && lwb->lwb_zio != NULL) lwb = zil_lwb_write_start(zilog, lwb); zilog->zl_prev_used = zilog->zl_cur_used; zilog->zl_cur_used = 0; /* * Wait if necessary for the log blocks to be on stable storage. */ if (zilog->zl_root_zio) { DTRACE_PROBE1(zil__cw3, zilog_t *, zilog); (void) zio_wait(zilog->zl_root_zio); zilog->zl_root_zio = NULL; DTRACE_PROBE1(zil__cw4, zilog_t *, zilog); zil_flush_vdevs(zilog); } if (zilog->zl_log_error || lwb == NULL) { zilog->zl_log_error = 0; txg_wait_synced(zilog->zl_dmu_pool, 0); } mutex_enter(&zilog->zl_lock); zilog->zl_writer = B_FALSE; ASSERT3U(commit_seq, >=, zilog->zl_commit_seq); zilog->zl_commit_seq = commit_seq; }