/* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data * blocks but can't because they are all pinned by entries in these caches. */ int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) { zfs_sb_t *zsb = sb->s_fs_info; int error = 0; #if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) struct shrinker *shrinker = &sb->s_shrink; struct shrink_control sc = { .nr_to_scan = nr_to_scan, .gfp_mask = GFP_KERNEL, }; #endif ZFS_ENTER(zsb); #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \ defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) { *objects = 0; for_each_online_node(sc.nid) *objects += (*shrinker->scan_objects)(shrinker, &sc); } else { *objects = (*shrinker->scan_objects)(shrinker, &sc); } #elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) *objects = (*shrinker->scan_objects)(shrinker, &sc); #elif defined(HAVE_SHRINK) *objects = (*shrinker->shrink)(shrinker, &sc); #elif defined(HAVE_D_PRUNE_ALIASES) #define D_PRUNE_ALIASES_IS_DEFAULT *objects = zfs_sb_prune_aliases(zsb, nr_to_scan); #else #error "No available dentry and inode cache pruning mechanism." #endif #if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT) #undef D_PRUNE_ALIASES_IS_DEFAULT /* * Fall back to zfs_sb_prune_aliases if the kernel's per-superblock * shrinker couldn't free anything, possibly due to the inodes being * allocated in a different memcg. */ if (*objects == 0) *objects = zfs_sb_prune_aliases(zsb, nr_to_scan); #endif ZFS_EXIT(zsb); dprintf_ds(zsb->z_os->os_dsl_dataset, "pruning, nr_to_scan=%lu objects=%d error=%d\n", nr_to_scan, *objects, error); return (error); }
/* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data * blocks but can't because they are all pinned by entries in these caches. */ int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) { zfs_sb_t *zsb = sb->s_fs_info; int error = 0; #if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) struct shrinker *shrinker = &sb->s_shrink; struct shrink_control sc = { .nr_to_scan = nr_to_scan, .gfp_mask = GFP_KERNEL, }; #endif ZFS_ENTER(zsb); #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \ defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) { *objects = 0; for_each_online_node(sc.nid) *objects += (*shrinker->scan_objects)(shrinker, &sc); } else { *objects = (*shrinker->scan_objects)(shrinker, &sc); } #elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) *objects = (*shrinker->scan_objects)(shrinker, &sc); #elif defined(HAVE_SHRINK) *objects = (*shrinker->shrink)(shrinker, &sc); #elif defined(HAVE_D_PRUNE_ALIASES) *objects = zfs_sb_prune_aliases(zsb, nr_to_scan); #else #error "No available dentry and inode cache pruning mechanism." #endif ZFS_EXIT(zsb); dprintf_ds(zsb->z_os->os_dsl_dataset, "pruning, nr_to_scan=%lu objects=%d error=%d\n", nr_to_scan, *objects, error); return (error); }
/* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data * blocks but can't because they are all pinned by entries in these caches. */ int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) { zfs_sb_t *zsb = sb->s_fs_info; int error = 0; #if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) struct shrinker *shrinker = &sb->s_shrink; struct shrink_control sc = { .nr_to_scan = nr_to_scan, .gfp_mask = GFP_KERNEL, }; #endif ZFS_ENTER(zsb); #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) *objects = (*shrinker->scan_objects)(shrinker, &sc); #elif defined(HAVE_SHRINK) *objects = (*shrinker->shrink)(shrinker, &sc); #else /* * Linux kernels older than 3.1 do not support a per-filesystem * shrinker. Therefore, we must fall back to the only available * interface which is to discard all unused dentries and inodes. * This behavior clearly isn't ideal but it's required so the ARC * may free memory. The performance impact is mitigated by the * fact that the frequently accessed dentry and inode buffers will * still be in the ARC making them relatively cheap to recreate. */ *objects = 0; shrink_dcache_parent(sb->s_root); #endif ZFS_EXIT(zsb); dprintf_ds(zsb->z_os->os_dsl_dataset, "pruning, nr_to_scan=%lu objects=%d error=%d\n", nr_to_scan, *objects, error); return (error); }
/* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; zio_prop_t zp; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ SET_BOOKMARK(&zb, os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); arc_release(os->os_phys_buf, &os->os_phys_buf); dmu_write_policy(os, NULL, 0, 0, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ DMU_META_DNODE(os)->dn_zio = zio; dnode_sync(DMU_META_DNODE(os), tx); os->os_phys->os_flags = os->os_flags; if (DMU_USERUSED_DNODE(os) && DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { DMU_USERUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_USERUSED_DNODE(os), tx); DMU_GROUPUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_GROUPUSED_DNODE(os), tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; while (dr = list_head(list)) { ASSERT0(dr->dr_dbuf->db_level); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
/* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; writeprops_t wp = { 0 }; zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } wp.wp_type = DMU_OT_OBJSET; wp.wp_copies = os->os_copies; wp.wp_level = (uint8_t)-1; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; arc_release(os->os_phys_buf, &os->os_phys_buf); zio = arc_write(pio, os->os_spa, &wp, tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_METADATA, &zb); /* * Sync meta-dnode - the parent IO for the sync is the root block */ os->os_meta_dnode->dn_zio = zio; dnode_sync(os->os_meta_dnode, tx); txgoff = tx->tx_txg & TXG_MASK; dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx); list = &os->os_meta_dnode->dn_dirty_records[txgoff]; while (dr = list_head(list)) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
/* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; writeprops_t wp = { 0 }; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ zb.zb_blkid = 0; wp.wp_type = DMU_OT_OBJSET; wp.wp_level = 0; /* on-disk BP level; see above */ wp.wp_copies = os->os_copies; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } arc_release(os->os_phys_buf, &os->os_phys_buf); zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ os->os_meta_dnode->dn_zio = zio; dnode_sync(os->os_meta_dnode, tx); os->os_phys->os_flags = os->os_flags; if (os->os_userused_dnode && os->os_userused_dnode->dn_type != DMU_OT_NONE) { os->os_userused_dnode->dn_zio = zio; dnode_sync(os->os_userused_dnode, tx); os->os_groupused_dnode->dn_zio = zio; dnode_sync(os->os_groupused_dnode, tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &os->os_meta_dnode->dn_dirty_records[txgoff]; while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }