Example #1
0
/*
 * Acquire synchronization TID
 */
static
int
hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_synctid *std)
{
	hammer_mount_t hmp = ip->hmp;
	int error = 0;

	switch(std->op) {
	case HAMMER_SYNCTID_NONE:
		std->tid = hmp->flusher.tid;	/* inaccurate */
		break;
	case HAMMER_SYNCTID_ASYNC:
		hammer_queue_inodes_flusher(hmp, MNT_NOWAIT);
		hammer_flusher_async(hmp, NULL);
		std->tid = hmp->flusher.tid;	/* inaccurate */
		break;
	case HAMMER_SYNCTID_SYNC1:
		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
		hammer_flusher_sync(hmp);
		std->tid = hmp->flusher.tid;
		break;
	case HAMMER_SYNCTID_SYNC2:
		hammer_queue_inodes_flusher(hmp, MNT_WAIT);
		hammer_flusher_sync(hmp);
		std->tid = hmp->flusher.tid;
		hammer_flusher_sync(hmp);
		break;
	default:
		error = EOPNOTSUPP;
		break;
	}
	return(error);
}
/*
 * Clean up the internal mount structure and disassociate it from the mount.
 * This may issue I/O.
 *
 * Called with fs_token held.
 */
static void
hammer_free_hmp(struct mount *mp)
{
	hammer_mount_t hmp = (void *)mp->mnt_data;
	hammer_flush_group_t flg;
	int count;
	int dummy;

	/*
	 * Flush anything dirty.  This won't even run if the
	 * filesystem errored-out.
	 */
	count = 0;
	while (hammer_flusher_haswork(hmp)) {
		hammer_flusher_sync(hmp);
		++count;
		if (count >= 5) {
			if (count == 5)
				kprintf("HAMMER: umount flushing.");
			else
				kprintf(".");
			tsleep(&dummy, 0, "hmrufl", hz);
		}
		if (count == 30) {
			kprintf("giving up\n");
			break;
		}
	}
	if (count >= 5 && count < 30)
		kprintf("\n");

	/*
	 * If the mount had a critical error we have to destroy any
	 * remaining inodes before we can finish cleaning up the flusher.
	 */
	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) {
		RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
			hammer_destroy_inode_callback, NULL);
	}

	/*
	 * There shouldn't be any inodes left now and any left over
	 * flush groups should now be empty.
	 */
	KKASSERT(RB_EMPTY(&hmp->rb_inos_root));
	while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) {
		TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry);
		KKASSERT(RB_EMPTY(&flg->flush_tree));
		if (flg->refs) {
			kprintf("HAMMER: Warning, flush_group %p was "
				"not empty on umount!\n", flg);
		}
		kfree(flg, hmp->m_misc);
	}

	/*
	 * We can finally destroy the flusher
	 */
	hammer_flusher_destroy(hmp);

	/*
	 * We may have held recovered buffers due to a read-only mount.
	 * These must be discarded.
	 */
	if (hmp->ronly)
		hammer_recover_flush_buffers(hmp, NULL, -1);

	/*
	 * Unload buffers and then volumes
	 */
        RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
		hammer_unload_buffer, NULL);
	RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
		hammer_unload_volume, NULL);

	mp->mnt_data = NULL;
	mp->mnt_flag &= ~MNT_LOCAL;
	hmp->mp = NULL;
	hammer_destroy_objid_cache(hmp);
	hammer_destroy_dedup_cache(hmp);
	if (hmp->dedup_free_cache != NULL) {
		kfree(hmp->dedup_free_cache, hmp->m_misc);
		hmp->dedup_free_cache = NULL;
	}
	kmalloc_destroy(&hmp->m_misc);
	kmalloc_destroy(&hmp->m_inodes);
	lwkt_reltoken(&hmp->fs_token);
	kfree(hmp, M_HAMMER);
}
static int
hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
		 struct ucred *cred)
{
	struct hammer_mount_info info;
	hammer_mount_t hmp;
	hammer_volume_t rootvol;
	struct vnode *rootvp;
	struct vnode *devvp = NULL;
	const char *upath;	/* volume name in userspace */
	char *path;		/* volume name in system space */
	int error;
	int i;
	int master_id;
	char *next_volume_ptr = NULL;

	/*
	 * Accept hammer_mount_info.  mntpt is NULL for root mounts at boot.
	 */
	if (mntpt == NULL) {
		bzero(&info, sizeof(info));
		info.asof = 0;
		info.hflags = 0;
		info.nvolumes = 1;

		next_volume_ptr = mp->mnt_stat.f_mntfromname;

		/* Count number of volumes separated by ':' */
		for (char *p = next_volume_ptr; *p != '\0'; ++p) {
			if (*p == ':') {
				++info.nvolumes;
			}
		}

		mp->mnt_flag &= ~MNT_RDONLY; /* mount R/W */
	} else {
		if ((error = copyin(data, &info, sizeof(info))) != 0)
			return (error);
	}

	/*
	 * updating or new mount
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		hmp = (void *)mp->mnt_data;
		KKASSERT(hmp != NULL);
	} else {
		if (info.nvolumes <= 0 || info.nvolumes > HAMMER_MAX_VOLUMES)
			return (EINVAL);
		hmp = NULL;
	}

	/*
	 * master-id validation.  The master id may not be changed by a
	 * mount update.
	 */
	if (info.hflags & HMNT_MASTERID) {
		if (hmp && hmp->master_id != info.master_id) {
			kprintf("hammer: cannot change master id "
				"with mount update\n");
			return(EINVAL);
		}
		master_id = info.master_id;
		if (master_id < -1 || master_id >= HAMMER_MAX_MASTERS)
			return (EINVAL);
	} else {
		if (hmp)
			master_id = hmp->master_id;
		else
			master_id = 0;
	}

	/*
	 * Internal mount data structure
	 */
	if (hmp == NULL) {
		hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO);
		mp->mnt_data = (qaddr_t)hmp;
		hmp->mp = mp;
		/*TAILQ_INIT(&hmp->recycle_list);*/

		/*
		 * Make sure kmalloc type limits are set appropriately.
		 *
		 * Our inode kmalloc group is sized based on maxvnodes
		 * (controlled by the system, not us).
		 */
		kmalloc_create(&hmp->m_misc, "HAMMER-others");
		kmalloc_create(&hmp->m_inodes, "HAMMER-inodes");

		kmalloc_raise_limit(hmp->m_inodes, 0);	/* unlimited */

		hmp->root_btree_beg.localization = 0x00000000U;
		hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
		hmp->root_btree_beg.key = -0x8000000000000000LL;
		hmp->root_btree_beg.create_tid = 1;
		hmp->root_btree_beg.delete_tid = 1;
		hmp->root_btree_beg.rec_type = 0;
		hmp->root_btree_beg.obj_type = 0;

		hmp->root_btree_end.localization = 0xFFFFFFFFU;
		hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
		hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
		hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
		hmp->root_btree_end.delete_tid = 0;   /* special case */
		hmp->root_btree_end.rec_type = 0xFFFFU;
		hmp->root_btree_end.obj_type = 0;

		hmp->krate.freq = 1;	/* maximum reporting rate (hz) */
		hmp->krate.count = -16;	/* initial burst */

		hmp->sync_lock.refs = 1;
		hmp->free_lock.refs = 1;
		hmp->undo_lock.refs = 1;
		hmp->blkmap_lock.refs = 1;
		hmp->snapshot_lock.refs = 1;
		hmp->volume_lock.refs = 1;

		TAILQ_INIT(&hmp->delay_list);
		TAILQ_INIT(&hmp->flush_group_list);
		TAILQ_INIT(&hmp->objid_cache_list);
		TAILQ_INIT(&hmp->undo_lru_list);
		TAILQ_INIT(&hmp->reclaim_list);

		RB_INIT(&hmp->rb_dedup_crc_root);
		RB_INIT(&hmp->rb_dedup_off_root);	
		TAILQ_INIT(&hmp->dedup_lru_list);
	}
	hmp->hflags &= ~HMNT_USERFLAGS;
	hmp->hflags |= info.hflags & HMNT_USERFLAGS;

	hmp->master_id = master_id;

	if (info.asof) {
		mp->mnt_flag |= MNT_RDONLY;
		hmp->asof = info.asof;
	} else {
		hmp->asof = HAMMER_MAX_TID;
	}

	hmp->volume_to_remove = -1;

	/*
	 * Re-open read-write if originally read-only, or vise-versa.
	 *
	 * When going from read-only to read-write execute the stage2
	 * recovery if it has not already been run.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		lwkt_gettoken(&hmp->fs_token);
		error = 0;
		if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
			kprintf("HAMMER read-only -> read-write\n");
			hmp->ronly = 0;
			RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
				hammer_adjust_volume_mode, NULL);
			rootvol = hammer_get_root_volume(hmp, &error);
			if (rootvol) {
				hammer_recover_flush_buffers(hmp, rootvol, 1);
				error = hammer_recover_stage2(hmp, rootvol);
				bcopy(rootvol->ondisk->vol0_blockmap,
				      hmp->blockmap,
				      sizeof(hmp->blockmap));
				hammer_rel_volume(rootvol, 0);
			}
			RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
				hammer_reload_inode, NULL);
			/* kernel clears MNT_RDONLY */
		} else if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
			kprintf("HAMMER read-write -> read-only\n");
			hmp->ronly = 1;	/* messy */
			RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
				hammer_reload_inode, NULL);
			hmp->ronly = 0;
			hammer_flusher_sync(hmp);
			hammer_flusher_sync(hmp);
			hammer_flusher_sync(hmp);
			hmp->ronly = 1;
			RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
				hammer_adjust_volume_mode, NULL);
		}
		lwkt_reltoken(&hmp->fs_token);
		return(error);
	}

	RB_INIT(&hmp->rb_vols_root);
	RB_INIT(&hmp->rb_inos_root);
	RB_INIT(&hmp->rb_redo_root);
	RB_INIT(&hmp->rb_nods_root);
	RB_INIT(&hmp->rb_undo_root);
	RB_INIT(&hmp->rb_resv_root);
	RB_INIT(&hmp->rb_bufs_root);
	RB_INIT(&hmp->rb_pfsm_root);

	hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);

	RB_INIT(&hmp->volu_root);
	RB_INIT(&hmp->undo_root);
	RB_INIT(&hmp->data_root);
	RB_INIT(&hmp->meta_root);
	RB_INIT(&hmp->lose_root);
	TAILQ_INIT(&hmp->iorun_list);

	lwkt_token_init(&hmp->fs_token, "hammerfs");
	lwkt_token_init(&hmp->io_token, "hammerio");

	lwkt_gettoken(&hmp->fs_token);

	/*
	 * Load volumes
	 */
	path = objcache_get(namei_oc, M_WAITOK);
	hmp->nvolumes = -1;
	for (i = 0; i < info.nvolumes; ++i) {
		if (mntpt == NULL) {
			/*
			 * Root mount.
			 */
			KKASSERT(next_volume_ptr != NULL);
			strcpy(path, "");
			if (*next_volume_ptr != '/') {
				/* relative path */
				strcpy(path, "/dev/");
			}
			int k;
			for (k = strlen(path); k < MAXPATHLEN-1; ++k) {
				if (*next_volume_ptr == '\0') {
					break;
				} else if (*next_volume_ptr == ':') {
					++next_volume_ptr;
					break;
				} else {
					path[k] = *next_volume_ptr;
					++next_volume_ptr;
				}
			}
			path[k] = '\0';

			error = 0;
			cdev_t dev = kgetdiskbyname(path);
			error = bdevvp(dev, &devvp);
			if (error) {
				kprintf("hammer_mountroot: can't find devvp\n");
			}
		} else {
			error = copyin(&info.volumes[i], &upath,
				       sizeof(char *));
			if (error == 0)
				error = copyinstr(upath, path,
						  MAXPATHLEN, NULL);
		}
		if (error == 0)
			error = hammer_install_volume(hmp, path, devvp);
		if (error)
			break;
	}
	objcache_put(namei_oc, path);

	/*
	 * Make sure we found a root volume
	 */
	if (error == 0 && hmp->rootvol == NULL) {
		kprintf("hammer_mount: No root volume found!\n");
		error = EINVAL;
	}

	/*
	 * Check that all required volumes are available
	 */
	if (error == 0 && hammer_mountcheck_volumes(hmp)) {
		kprintf("hammer_mount: Missing volumes, cannot mount!\n");
		error = EINVAL;
	}

	if (error) {
		/* called with fs_token held */
		hammer_free_hmp(mp);
		return (error);
	}

	/*
	 * No errors, setup enough of the mount point so we can lookup the
	 * root vnode.
	 */
	mp->mnt_iosize_max = MAXPHYS;
	mp->mnt_kern_flag |= MNTK_FSMID;
	mp->mnt_kern_flag |= MNTK_THR_SYNC;	/* new vsyncscan semantics */

	/*
	 * MPSAFE code.  Note that VOPs and VFSops which are not MPSAFE
	 * will acquire a per-mount token prior to entry and release it
	 * on return, so even if we do not specify it we no longer get
	 * the BGL regardlless of how we are flagged.
	 */
	mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
	/*MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_IN_MPSAFE;*/

	/* 
	 * note: f_iosize is used by vnode_pager_haspage() when constructing
	 * its VOP_BMAP call.
	 */
	mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
	mp->mnt_stat.f_bsize = HAMMER_BUFSIZE;

	mp->mnt_vstat.f_frsize = HAMMER_BUFSIZE;
	mp->mnt_vstat.f_bsize = HAMMER_BUFSIZE;

	mp->mnt_maxsymlinklen = 255;
	mp->mnt_flag |= MNT_LOCAL;

	vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops);
	vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops);
	vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops);

	/*
	 * The root volume's ondisk pointer is only valid if we hold a
	 * reference to it.
	 */
	rootvol = hammer_get_root_volume(hmp, &error);
	if (error)
		goto failed;

	/*
	 * Perform any necessary UNDO operations.  The recovery code does
	 * call hammer_undo_lookup() so we have to pre-cache the blockmap,
	 * and then re-copy it again after recovery is complete.
	 *
	 * If this is a read-only mount the UNDO information is retained
	 * in memory in the form of dirty buffer cache buffers, and not
	 * written back to the media.
	 */
	bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
	      sizeof(hmp->blockmap));

	/*
	 * Check filesystem version
	 */
	hmp->version = rootvol->ondisk->vol_version;
	if (hmp->version < HAMMER_VOL_VERSION_MIN ||
	    hmp->version > HAMMER_VOL_VERSION_MAX) {
		kprintf("HAMMER: mount unsupported fs version %d\n",
			hmp->version);
		error = ERANGE;
		goto done;
	}

	/*
	 * The undo_rec_limit limits the size of flush groups to avoid
	 * blowing out the UNDO FIFO.  This calculation is typically in
	 * the tens of thousands and is designed primarily when small
	 * HAMMER filesystems are created.
	 */
	hmp->undo_rec_limit = hammer_undo_max(hmp) / 8192 + 100;
	if (hammer_debug_general & 0x0001)
		kprintf("HAMMER: undo_rec_limit %d\n", hmp->undo_rec_limit);

	/*
	 * NOTE: Recover stage1 not only handles meta-data recovery, it
	 * 	 also sets hmp->undo_seqno for HAMMER VERSION 4+ filesystems.
	 */
	error = hammer_recover_stage1(hmp, rootvol);
	if (error) {
		kprintf("Failed to recover HAMMER filesystem on mount\n");
		goto done;
	}

	/*
	 * Finish setup now that we have a good root volume.
	 *
	 * The top 16 bits of fsid.val[1] is a pfs id.
	 */
	ksnprintf(mp->mnt_stat.f_mntfromname,
		  sizeof(mp->mnt_stat.f_mntfromname), "%s",
		  rootvol->ondisk->vol_name);
	mp->mnt_stat.f_fsid.val[0] =
		crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8);
	mp->mnt_stat.f_fsid.val[1] =
		crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8);
	mp->mnt_stat.f_fsid.val[1] &= 0x0000FFFF;

	mp->mnt_vstat.f_fsid_uuid = rootvol->ondisk->vol_fsid;
	mp->mnt_vstat.f_fsid = crc32(&mp->mnt_vstat.f_fsid_uuid,
				     sizeof(mp->mnt_vstat.f_fsid_uuid));

	/*
	 * Certain often-modified fields in the root volume are cached in
	 * the hammer_mount structure so we do not have to generate lots
	 * of little UNDO structures for them.
	 *
	 * Recopy after recovery.  This also has the side effect of
	 * setting our cached undo FIFO's first_offset, which serves to
	 * placemark the FIFO start for the NEXT flush cycle while the
	 * on-disk first_offset represents the LAST flush cycle.
	 */
	hmp->next_tid = rootvol->ondisk->vol0_next_tid;
	hmp->flush_tid1 = hmp->next_tid;
	hmp->flush_tid2 = hmp->next_tid;
	bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
	      sizeof(hmp->blockmap));
	hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks;

	hammer_flusher_create(hmp);

	/*
	 * Locate the root directory using the root cluster's B-Tree as a
	 * starting point.  The root directory uses an obj_id of 1.
	 *
	 * FUTURE: Leave the root directory cached referenced but unlocked
	 * in hmp->rootvp (need to flush it on unmount).
	 */
	error = hammer_vfs_vget(mp, NULL, 1, &rootvp);
	if (error)
		goto done;
	vput(rootvp);
	/*vn_unlock(hmp->rootvp);*/
	if (hmp->ronly == 0)
		error = hammer_recover_stage2(hmp, rootvol);

	/*
	 * If the stage2 recovery fails be sure to clean out all cached
	 * vnodes before throwing away the mount structure or bad things
	 * will happen.
	 */
	if (error)
		vflush(mp, 0, 0);

done:
	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		/* New mount */

		/* Populate info for mount point (NULL pad)*/
		bzero(mp->mnt_stat.f_mntonname, MNAMELEN);
		size_t size;
		if (mntpt) {
			copyinstr(mntpt, mp->mnt_stat.f_mntonname,
							MNAMELEN -1, &size);
		} else { /* Root mount */
			mp->mnt_stat.f_mntonname[0] = '/';
		}
	}
	(void)VFS_STATFS(mp, &mp->mnt_stat, cred);
	hammer_rel_volume(rootvol, 0);
failed:
	/*
	 * Cleanup and return.
	 */
	if (error) {
		/* called with fs_token held */
		hammer_free_hmp(mp);
	} else {
		lwkt_reltoken(&hmp->fs_token);
	}
	return (error);
}
Example #4
0
int
hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_reblock *reblock)
{
	struct hammer_cursor cursor;
	hammer_btree_elm_t elm;
	int checkspace_count;
	int error;
	int seq;
	int slop;

	/*
	 * A fill level <= 20% is considered an emergency.  free_level is
	 * inverted from fill_level.
	 */
	if (reblock->free_level >= HAMMER_LARGEBLOCK_SIZE * 8 / 10)
		slop = HAMMER_CHKSPC_EMERGENCY;
	else
		slop = HAMMER_CHKSPC_REBLOCK;

	if ((reblock->key_beg.localization | reblock->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
		return(EINVAL);
	if (reblock->free_level < 0)
		return(EINVAL);

	reblock->key_cur = reblock->key_beg;
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	reblock->key_cur.localization += ip->obj_localization;

	checkspace_count = 0;
	seq = trans->hmp->flusher.done;
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = reblock->key_cur.localization;
	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = (reblock->key_end.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	cursor.key_end.obj_id = reblock->key_end.obj_id;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;

	/*
	 * This flag allows the btree scan code to return internal nodes,
	 * so we can reblock them in addition to the leafs.  Only specify it
	 * if we intend to reblock B-Tree nodes.
	 */
	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
		cursor.flags |= HAMMER_CURSOR_REBLOCKING;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Internal or Leaf node
		 */
		KKASSERT(cursor.index < cursor.node->ondisk->count);
		elm = &cursor.node->ondisk->elms[cursor.index];
		reblock->key_cur.obj_id = elm->base.obj_id;
		reblock->key_cur.localization = elm->base.localization;

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		/*
		 * If there is insufficient free space it may be due to
		 * reserved bigblocks, which flushing might fix.
		 *
		 * We must force a retest in case the unlocked cursor is
		 * moved to the end of the leaf, or moved to an internal
		 * node.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		if (hammer_checkspace(trans->hmp, slop)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			cursor.flags |= HAMMER_CURSOR_RETEST;
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
			goto skip;
		}

		/*
		 * Acquiring the sync_lock prevents the operation from
		 * crossing a synchronization boundary.
		 *
		 * NOTE: cursor.node may have changed on return.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		hammer_sync_lock_sh(trans);
		error = hammer_reblock_helper(reblock, &cursor, elm);
		hammer_sync_unlock(trans);

		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * Setup for iteration, our cursor flags may be modified by
		 * other threads while we are unlocked.
		 */
		cursor.flags |= HAMMER_CURSOR_ATEDISK;

		/*
		 * We allocate data buffers, which atm we don't track
		 * dirty levels for because we allow the kernel to write
		 * them.  But if we allocate too many we can still deadlock
		 * the buffer cache.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 *	    (The cursor's node and element may change!)
		 */
		if (bd_heatup()) {
			hammer_unlock_cursor(&cursor);
			bwillwrite(HAMMER_XBUFSIZE);
			hammer_lock_cursor(&cursor);
		}
		/* XXX vm_wait_nominal(); */
skip:
		if (error == 0) {
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EWOULDBLOCK) {
		hammer_flusher_sync(trans->hmp);
		goto retry;
	}
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}
/*
 * Remove a volume.
 */
int
hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
		struct hammer_ioc_volume *ioc)
{
	struct hammer_mount *hmp = trans->hmp;
	struct mount *mp = hmp->mp;
	hammer_volume_t volume;
	int error = 0;

	if (mp->mnt_flag & MNT_RDONLY) {
		kprintf("Cannot del volume from read-only HAMMER filesystem\n");
		return (EINVAL);
	}

	if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
		kprintf("Another volume operation is in progress!\n");
		return (EAGAIN);
	}

	volume = NULL;

	/*
	 * find volume by volname
	 */
	for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
		volume = hammer_get_volume(hmp, vol_no, &error);
		if (volume == NULL && error == ENOENT) {
			/*
			 * Skip unused volume numbers
			 */
			error = 0;
			continue;
		}
		KKASSERT(volume != NULL && error == 0);
		if (strcmp(volume->vol_name, ioc->device_name) == 0) {
			break;
		}
		hammer_rel_volume(volume, 0);
		volume = NULL;
	}

	if (volume == NULL) {
		kprintf("Couldn't find volume\n");
		error = EINVAL;
		goto end;
	}

	if (volume == trans->rootvol) {
		kprintf("Cannot remove root-volume\n");
		hammer_rel_volume(volume, 0);
		error = EINVAL;
		goto end;
	}

	/*
	 *
	 */

	hmp->volume_to_remove = volume->vol_no;

	struct hammer_ioc_reblock reblock;
	bzero(&reblock, sizeof(reblock));

	reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
	reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
	reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
	reblock.key_end.obj_id = HAMMER_MAX_OBJID;
	reblock.head.flags = HAMMER_IOC_DO_FLAGS;
	reblock.free_level = 0;

	error = hammer_ioc_reblock(trans, ip, &reblock);

	if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) {
		error = EINTR;
	}

	if (error) {
		if (error == EINTR) {
			kprintf("reblock was interrupted\n");
		} else {
			kprintf("reblock failed: %d\n", error);
		}
		hmp->volume_to_remove = -1;
		hammer_rel_volume(volume, 0);
		goto end;
	}

	/*
	 * Sync filesystem
	 */
	int count = 0;
	while (hammer_flusher_haswork(hmp)) {
		hammer_flusher_sync(hmp);
		++count;
		if (count >= 5) {
			if (count == 5)
				kprintf("HAMMER: flushing.");
			else
				kprintf(".");
			tsleep(&count, 0, "hmrufl", hz);
		}
		if (count == 30) {
			kprintf("giving up");
			break;
		}
	}
	kprintf("\n");

	hammer_sync_lock_sh(trans);
	hammer_lock_ex(&hmp->blkmap_lock);

	/*
	 * We use stat later to update rootvol's bigblock stats
	 */
	struct bigblock_stat stat;
	error = hammer_free_freemap(trans, volume, &stat);
	if (error) {
		kprintf("Failed to free volume. Volume not empty!\n");
		hmp->volume_to_remove = -1;
		hammer_rel_volume(volume, 0);
		hammer_unlock(&hmp->blkmap_lock);
		hammer_sync_unlock(trans);
		goto end;
	}

	hmp->volume_to_remove = -1;

	hammer_rel_volume(volume, 0);

	/*
	 * Unload buffers
	 */
        RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
		hammer_unload_buffer, volume);

	error = hammer_unload_volume(volume, NULL);
	if (error == -1) {
		kprintf("Failed to unload volume\n");
		hammer_unlock(&hmp->blkmap_lock);
		hammer_sync_unlock(trans);
		goto end;
	}

	volume = NULL;
	--hmp->nvolumes;

	/*
	 * Set each volume's new value of the vol_count field.
	 */
	for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
		volume = hammer_get_volume(hmp, vol_no, &error);
		if (volume == NULL && error == ENOENT) {
			/*
			 * Skip unused volume numbers
			 */
			error = 0;
			continue;
		}

		KKASSERT(volume != NULL && error == 0);
		hammer_modify_volume_field(trans, volume, vol_count);
		volume->ondisk->vol_count = hmp->nvolumes;
		hammer_modify_volume_done(volume);

		/*
		 * Only changes to the header of the root volume
		 * are automatically flushed to disk. For all
		 * other volumes that we modify we do it here.
		 *
		 * No interlock is needed, volume buffers are not
		 * messed with by bioops.
		 */
		if (volume != trans->rootvol && volume->io.modified) {
			hammer_crc_set_volume(volume->ondisk);
			hammer_io_flush(&volume->io, 0);
		}

		hammer_rel_volume(volume, 0);
	}

	/*
	 * Update the total number of bigblocks
	 */
	hammer_modify_volume_field(trans, trans->rootvol,
		vol0_stat_bigblocks);
	trans->rootvol->ondisk->vol0_stat_bigblocks -= stat.total_bigblocks;
	hammer_modify_volume_done(trans->rootvol);

	/*
	 * Update the number of free bigblocks
	 * (including the copy in hmp)
	 */
	hammer_modify_volume_field(trans, trans->rootvol,
		vol0_stat_freebigblocks);
	trans->rootvol->ondisk->vol0_stat_freebigblocks -= stat.total_free_bigblocks;
	hmp->copy_stat_freebigblocks =
		trans->rootvol->ondisk->vol0_stat_freebigblocks;
	hammer_modify_volume_done(trans->rootvol);
	/*
	 * Bigblock count changed so recompute the total number of blocks.
	 */
	mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
	    (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE);
	mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
	    (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE);

	hammer_unlock(&hmp->blkmap_lock);
	hammer_sync_unlock(trans);

	/*
	 * Erase the volume header of the removed device.
	 *
	 * This is to not accidentally mount the volume again.
	 */
	struct vnode *devvp = NULL;
	error = hammer_setup_device(&devvp, ioc->device_name, 0);
	if (error) {
		kprintf("Failed to open device: %s\n", ioc->device_name);
		goto end;
	}
	KKASSERT(devvp);
	error = hammer_clear_volume_header(devvp);
	if (error) {
		kprintf("Failed to clear volume header of device: %s\n",
			ioc->device_name);
		goto end;
	}
	hammer_close_device(&devvp, 0);

	KKASSERT(error == 0);
end:
	hammer_unlock(&hmp->volume_lock);
	return (error);
}