/* * Walk the list of zstreams in the given zfetch, find an old one (by time), and * reclaim it for use by the caller. */ static zstream_t * dmu_zfetch_stream_reclaim(zfetch_t *zf) { zstream_t *zs; if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) return (0); for (zs = list_head(&zf->zf_stream); zs; zs = list_next(&zf->zf_stream, zs)) { if (((lbolt - zs->zst_last) / hz) > zfetch_min_sec_reap) break; } if (zs) { dmu_zfetch_stream_remove(zf, zs); mutex_destroy(&zs->zst_lock); bzero(zs, sizeof (zstream_t)); } else { zf->zf_alloc_fail++; } rw_exit(&zf->zf_rwlock); return (zs); }
/* * If there aren't too many streams already, create a new stream. * The "blkid" argument is the next block that we expect this stream to access. * While we're here, clean up old streams (which haven't been * accessed for at least zfetch_min_sec_reap seconds). */ static void dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) { zstream_t *zs; zstream_t *zs_next; int numstreams = 0; uint32_t max_streams; ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); /* * Clean up old streams. */ for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) { zs_next = list_next(&zf->zf_stream, zs); if (((gethrtime() - zs->zs_atime) / NANOSEC) > zfetch_min_sec_reap) dmu_zfetch_stream_remove(zf, zs); else numstreams++; } /* * The maximum number of streams is normally zfetch_max_streams, * but for small files we lower it such that it's at least possible * for all the streams to be non-overlapping. * * If we are already at the maximum number of streams for this file, * even after removing old streams, then don't create this stream. */ max_streams = MAX(1, MIN(zfetch_max_streams, zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz / zfetch_max_distance)); if (numstreams >= max_streams) { ZFETCHSTAT_BUMP(zfetchstat_max_streams); return; } zs = kmem_zalloc(sizeof (*zs), KM_SLEEP); zs->zs_blkid = blkid; zs->zs_pf_blkid = blkid; zs->zs_ipf_blkid = blkid; zs->zs_atime = gethrtime(); mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL); list_insert_head(&zf->zf_stream, zs); }
/* * Clean-up state associated with a zfetch structure (e.g. destroy the * streams). This doesn't free the zfetch_t itself, that's left to the caller. */ void dmu_zfetch_fini(zfetch_t *zf) { zstream_t *zs; ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock)); rw_enter(&zf->zf_rwlock, RW_WRITER); while ((zs = list_head(&zf->zf_stream)) != NULL) dmu_zfetch_stream_remove(zf, zs); rw_exit(&zf->zf_rwlock); list_destroy(&zf->zf_stream); rw_destroy(&zf->zf_rwlock); zf->zf_dnode = NULL; }
/* * Given a zfetch structure and a zstream structure, determine whether the * blocks to be read are part of a co-linear pair of existing prefetch * streams. If a set is found, coalesce the streams, removing one, and * configure the prefetch so it looks for a strided access pattern. * * In other words: if we find two sequential access streams that are * the same length and distance N appart, and this read is N from the * last stream, then we are probably in a strided access pattern. So * combine the two sequential streams into a single strided stream. * * If no co-linear streams are found, return NULL. */ static int dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh) { zstream_t *z_walk; zstream_t *z_comp; if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) return (0); if (zh == NULL) { rw_exit(&zf->zf_rwlock); return (0); } for (z_walk = list_head(&zf->zf_stream); z_walk; z_walk = list_next(&zf->zf_stream, z_walk)) { for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp; z_comp = list_next(&zf->zf_stream, z_comp)) { int64_t diff; if (z_walk->zst_len != z_walk->zst_stride || z_comp->zst_len != z_comp->zst_stride) { continue; } diff = z_comp->zst_offset - z_walk->zst_offset; if (z_comp->zst_offset + diff == zh->zst_offset) { z_walk->zst_offset = zh->zst_offset; z_walk->zst_direction = diff < 0 ? -1 : 1; z_walk->zst_stride = diff * z_walk->zst_direction; z_walk->zst_ph_offset = zh->zst_offset + z_walk->zst_stride; dmu_zfetch_stream_remove(zf, z_comp); mutex_destroy(&z_comp->zst_lock); kmem_free(z_comp, sizeof (zstream_t)); dmu_zfetch_dofetch(zf, z_walk); rw_exit(&zf->zf_rwlock); return (1); } diff = z_walk->zst_offset - z_comp->zst_offset; if (z_walk->zst_offset + diff == zh->zst_offset) { z_walk->zst_offset = zh->zst_offset; z_walk->zst_direction = diff < 0 ? -1 : 1; z_walk->zst_stride = diff * z_walk->zst_direction; z_walk->zst_ph_offset = zh->zst_offset + z_walk->zst_stride; dmu_zfetch_stream_remove(zf, z_comp); mutex_destroy(&z_comp->zst_lock); kmem_free(z_comp, sizeof (zstream_t)); dmu_zfetch_dofetch(zf, z_walk); rw_exit(&zf->zf_rwlock); return (1); } } } rw_exit(&zf->zf_rwlock); return (0); }
/* * given a zfetch and a zsearch structure, see if there is an associated zstream * for this block read. If so, it starts a prefetch for the stream it * located and returns true, otherwise it returns false */ static int dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) { zstream_t *zs; int64_t diff; int reset = !prefetched; int rc = 0; if (zh == NULL) return (0); /* * XXX: This locking strategy is a bit coarse; however, it's impact has * yet to be tested. If this turns out to be an issue, it can be * modified in a number of different ways. */ rw_enter(&zf->zf_rwlock, RW_READER); top: for (zs = list_head(&zf->zf_stream); zs; zs = list_next(&zf->zf_stream, zs)) { /* * XXX - should this be an assert? */ if (zs->zst_len == 0) { /* bogus stream */ continue; } /* * We hit this case when we are in a strided prefetch stream: * we will read "len" blocks before "striding". */ if (zh->zst_offset >= zs->zst_offset && zh->zst_offset < zs->zst_offset + zs->zst_len) { /* already fetched */ rc = 1; goto out; } /* * This is the forward sequential read case: we increment * len by one each time we hit here, so we will enter this * case on every read. */ if (zh->zst_offset == zs->zst_offset + zs->zst_len) { reset = !prefetched && zs->zst_len > 1; mutex_enter(&zs->zst_lock); if (zh->zst_offset != zs->zst_offset + zs->zst_len) { mutex_exit(&zs->zst_lock); goto top; } zs->zst_len += zh->zst_len; diff = zs->zst_len - zfetch_block_cap; if (diff > 0) { zs->zst_offset += diff; zs->zst_len = zs->zst_len > diff ? zs->zst_len - diff : 0; } zs->zst_direction = ZFETCH_FORWARD; break; /* * Same as above, but reading backwards through the file. */ } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) { /* backwards sequential access */ reset = !prefetched && zs->zst_len > 1; mutex_enter(&zs->zst_lock); if (zh->zst_offset != zs->zst_offset - zh->zst_len) { mutex_exit(&zs->zst_lock); goto top; } zs->zst_offset = zs->zst_offset > zh->zst_len ? zs->zst_offset - zh->zst_len : 0; zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ? zs->zst_ph_offset - zh->zst_len : 0; zs->zst_len += zh->zst_len; diff = zs->zst_len - zfetch_block_cap; if (diff > 0) { zs->zst_ph_offset = zs->zst_ph_offset > diff ? zs->zst_ph_offset - diff : 0; zs->zst_len = zs->zst_len > diff ? zs->zst_len - diff : zs->zst_len; } zs->zst_direction = ZFETCH_BACKWARD; break; } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride < zs->zst_len) && (zs->zst_len != zs->zst_stride)) { /* strided forward access */ mutex_enter(&zs->zst_lock); if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >= zs->zst_len) || (zs->zst_len == zs->zst_stride)) { mutex_exit(&zs->zst_lock); goto top; } zs->zst_offset += zs->zst_stride; zs->zst_direction = ZFETCH_FORWARD; break; } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride < zs->zst_len) && (zs->zst_len != zs->zst_stride)) { /* strided reverse access */ mutex_enter(&zs->zst_lock); if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >= zs->zst_len) || (zs->zst_len == zs->zst_stride)) { mutex_exit(&zs->zst_lock); goto top; } zs->zst_offset = zs->zst_offset > zs->zst_stride ? zs->zst_offset - zs->zst_stride : 0; zs->zst_ph_offset = (zs->zst_ph_offset > (2 * zs->zst_stride)) ? (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0; zs->zst_direction = ZFETCH_BACKWARD; break; } } if (zs) { if (reset) { zstream_t *remove = zs; rc = 0; mutex_exit(&zs->zst_lock); rw_exit(&zf->zf_rwlock); rw_enter(&zf->zf_rwlock, RW_WRITER); /* * Relocate the stream, in case someone removes * it while we were acquiring the WRITER lock. */ for (zs = list_head(&zf->zf_stream); zs; zs = list_next(&zf->zf_stream, zs)) { if (zs == remove) { dmu_zfetch_stream_remove(zf, zs); mutex_destroy(&zs->zst_lock); kmem_free(zs, sizeof (zstream_t)); break; } } } else { rc = 1; dmu_zfetch_dofetch(zf, zs); mutex_exit(&zs->zst_lock); } } out: rw_exit(&zf->zf_rwlock); return (rc); }