Exemple #1
0
/*
 * The ARC has requested that the filesystem drop entries from the dentry
 * and inode caches.  This can occur when the ARC needs to free meta data
 * blocks but can't because they are all pinned by entries in these caches.
 */
int
zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
	zfs_sb_t *zsb = sb->s_fs_info;
	int error = 0;
#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	struct shrinker *shrinker = &sb->s_shrink;
	struct shrink_control sc = {
		.nr_to_scan = nr_to_scan,
		.gfp_mask = GFP_KERNEL,
	};
#endif

	ZFS_ENTER(zsb);

#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
	defined(SHRINK_CONTROL_HAS_NID) && \
	defined(SHRINKER_NUMA_AWARE)
	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
		*objects = 0;
		for_each_online_node(sc.nid)
			*objects += (*shrinker->scan_objects)(shrinker, &sc);
	} else {
			*objects = (*shrinker->scan_objects)(shrinker, &sc);
	}

#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
	*objects = (*shrinker->shrink)(shrinker, &sc);
#elif defined(HAVE_D_PRUNE_ALIASES)
#define	D_PRUNE_ALIASES_IS_DEFAULT
	*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#else
#error "No available dentry and inode cache pruning mechanism."
#endif

#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
#undef	D_PRUNE_ALIASES_IS_DEFAULT
	/*
	 * Fall back to zfs_sb_prune_aliases if the kernel's per-superblock
	 * shrinker couldn't free anything, possibly due to the inodes being
	 * allocated in a different memcg.
	 */
	if (*objects == 0)
		*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#endif

	ZFS_EXIT(zsb);

	dprintf_ds(zsb->z_os->os_dsl_dataset,
	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
	    nr_to_scan, *objects, error);

	return (error);
}
Exemple #2
0
/*
 * The ARC has requested that the filesystem drop entries from the dentry
 * and inode caches.  This can occur when the ARC needs to free meta data
 * blocks but can't because they are all pinned by entries in these caches.
 */
int
zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
	zfs_sb_t *zsb = sb->s_fs_info;
	int error = 0;
#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	struct shrinker *shrinker = &sb->s_shrink;
	struct shrink_control sc = {
		.nr_to_scan = nr_to_scan,
		.gfp_mask = GFP_KERNEL,
	};
#endif

	ZFS_ENTER(zsb);

#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
	defined(SHRINK_CONTROL_HAS_NID) && \
	defined(SHRINKER_NUMA_AWARE)
	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
		*objects = 0;
		for_each_online_node(sc.nid)
			*objects += (*shrinker->scan_objects)(shrinker, &sc);
	} else {
			*objects = (*shrinker->scan_objects)(shrinker, &sc);
	}
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
	*objects = (*shrinker->shrink)(shrinker, &sc);
#elif defined(HAVE_D_PRUNE_ALIASES)
	*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#else
#error "No available dentry and inode cache pruning mechanism."
#endif
	ZFS_EXIT(zsb);

	dprintf_ds(zsb->z_os->os_dsl_dataset,
	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
	    nr_to_scan, *objects, error);

	return (error);
}
Exemple #3
0
/*
 * The ARC has requested that the filesystem drop entries from the dentry
 * and inode caches.  This can occur when the ARC needs to free meta data
 * blocks but can't because they are all pinned by entries in these caches.
 */
int
zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
	zfs_sb_t *zsb = sb->s_fs_info;
	int error = 0;
#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	struct shrinker *shrinker = &sb->s_shrink;
	struct shrink_control sc = {
		.nr_to_scan = nr_to_scan,
		.gfp_mask = GFP_KERNEL,
	};
#endif

	ZFS_ENTER(zsb);

#if defined(HAVE_SPLIT_SHRINKER_CALLBACK)
	*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
	*objects = (*shrinker->shrink)(shrinker, &sc);
#else
	/*
	 * Linux kernels older than 3.1 do not support a per-filesystem
	 * shrinker.  Therefore, we must fall back to the only available
	 * interface which is to discard all unused dentries and inodes.
	 * This behavior clearly isn't ideal but it's required so the ARC
	 * may free memory.  The performance impact is mitigated by the
	 * fact that the frequently accessed dentry and inode buffers will
	 * still be in the ARC making them relatively cheap to recreate.
	 */
	*objects = 0;
	shrink_dcache_parent(sb->s_root);
#endif
	ZFS_EXIT(zsb);

	dprintf_ds(zsb->z_os->os_dsl_dataset,
	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
	    nr_to_scan, *objects, error);

	return (error);
}
Exemple #4
0
/* called from dsl */
void
dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	zio_prop_t zp;
	zio_t *zio;
	list_t *list;
	list_t *newlist = NULL;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
	arc_release(os->os_phys_buf, &os->os_phys_buf);

	dmu_write_policy(os, NULL, 0, 0, &zp);

	zio = arc_write(pio, os->os_spa, tx->tx_txg,
	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp,
	    dmu_objset_write_ready, dmu_objset_write_done, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

	/*
	 * Sync special dnodes - the parent IO for the sync is the root block
	 */
	DMU_META_DNODE(os)->dn_zio = zio;
	dnode_sync(DMU_META_DNODE(os), tx);

	os->os_phys->os_flags = os->os_flags;

	if (DMU_USERUSED_DNODE(os) &&
	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
		DMU_USERUSED_DNODE(os)->dn_zio = zio;
		dnode_sync(DMU_USERUSED_DNODE(os), tx);
		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
	}

	txgoff = tx->tx_txg & TXG_MASK;

	if (dmu_objset_userused_enabled(os)) {
		newlist = &os->os_synced_dnodes;
		/*
		 * We must create the list here because it uses the
		 * dn_dirty_link[] of this txg.
		 */
		list_create(newlist, sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[txgoff]));
	}

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);

	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
	while (dr = list_head(list)) {
		ASSERT0(dr->dr_dbuf->db_level);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}
Exemple #5
0
/* called from dsl */
void
dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	writeprops_t wp = { 0 };
	zio_t *zio;
	list_t *list;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
	zb.zb_object = 0;
	zb.zb_level = -1;
	zb.zb_blkid = 0;
	if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) {
		(void) dsl_dataset_block_kill(os->os_dsl_dataset,
		    os->os_rootbp, pio, tx);
	}
	wp.wp_type = DMU_OT_OBJSET;
	wp.wp_copies = os->os_copies;
	wp.wp_level = (uint8_t)-1;
	wp.wp_oschecksum = os->os_checksum;
	wp.wp_oscompress = os->os_compress;
	arc_release(os->os_phys_buf, &os->os_phys_buf);
	zio = arc_write(pio, os->os_spa, &wp,
	    tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_METADATA,
	    &zb);

	/*
	 * Sync meta-dnode - the parent IO for the sync is the root block
	 */
	os->os_meta_dnode->dn_zio = zio;
	dnode_sync(os->os_meta_dnode, tx);

	txgoff = tx->tx_txg & TXG_MASK;

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);

	list = &os->os_meta_dnode->dn_dirty_records[txgoff];
	while (dr = list_head(list)) {
		ASSERT(dr->dr_dbuf->db_level == 0);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}
Exemple #6
0
/* called from dsl */
void
dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	writeprops_t wp = { 0 };
	zio_t *zio;
	list_t *list;
	list_t *newlist = NULL;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
	zb.zb_object = 0;
	zb.zb_level = -1;	/* for block ordering; it's level 0 on disk */
	zb.zb_blkid = 0;

	wp.wp_type = DMU_OT_OBJSET;
	wp.wp_level = 0;	/* on-disk BP level; see above */
	wp.wp_copies = os->os_copies;
	wp.wp_oschecksum = os->os_checksum;
	wp.wp_oscompress = os->os_compress;

	if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) {
		(void) dsl_dataset_block_kill(os->os_dsl_dataset,
		    os->os_rootbp, pio, tx);
	}

	arc_release(os->os_phys_buf, &os->os_phys_buf);

	zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
	    tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

	/*
	 * Sync special dnodes - the parent IO for the sync is the root block
	 */
	os->os_meta_dnode->dn_zio = zio;
	dnode_sync(os->os_meta_dnode, tx);

	os->os_phys->os_flags = os->os_flags;

	if (os->os_userused_dnode &&
	    os->os_userused_dnode->dn_type != DMU_OT_NONE) {
		os->os_userused_dnode->dn_zio = zio;
		dnode_sync(os->os_userused_dnode, tx);
		os->os_groupused_dnode->dn_zio = zio;
		dnode_sync(os->os_groupused_dnode, tx);
	}

	txgoff = tx->tx_txg & TXG_MASK;

	if (dmu_objset_userused_enabled(os)) {
		newlist = &os->os_synced_dnodes;
		/*
		 * We must create the list here because it uses the
		 * dn_dirty_link[] of this txg.
		 */
		list_create(newlist, sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[txgoff]));
	}

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);

	list = &os->os_meta_dnode->dn_dirty_records[txgoff];
	while ((dr = list_head(list))) {
		ASSERT(dr->dr_dbuf->db_level == 0);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}