Exemple #1
0
int
hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
		struct hammer_ioc_volume *ioc)
{
	hammer_mount_t hmp = trans->hmp;
	struct mount *mp = hmp->mp;
	struct hammer_volume_ondisk ondisk;
	hammer_volume_t volume;
	int64_t total_bigblocks, empty_bigblocks;
	int free_vol_no = 0;
	int error;

	if (mp->mnt_flag & MNT_RDONLY) {
		hmkprintf(hmp, "Cannot add volume to read-only HAMMER filesystem\n");
		return (EINVAL);
	}

	if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
		hmkprintf(hmp, "Another volume operation is in progress!\n");
		return (EAGAIN);
	}

	if (hmp->nvolumes >= HAMMER_MAX_VOLUMES) {
		hammer_unlock(&hmp->volume_lock);
		hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n");
		return (EINVAL);
	}

	/*
	 * Find an unused volume number.
	 */
	while (free_vol_no < HAMMER_MAX_VOLUMES &&
		hammer_volume_number_test(hmp, free_vol_no)) {
		++free_vol_no;
	}
	if (free_vol_no >= HAMMER_MAX_VOLUMES) {
		hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n");
		error = EINVAL;
		goto end;
	}

	error = hammer_format_volume_header(hmp, ioc, &ondisk, free_vol_no);
	if (error)
		goto end;

	error = hammer_install_volume(hmp, ioc->device_name, NULL, &ondisk);
	if (error)
		goto end;

	hammer_sync_lock_sh(trans);
	hammer_lock_ex(&hmp->blkmap_lock);

	volume = hammer_get_volume(hmp, free_vol_no, &error);
	KKASSERT(volume != NULL && error == 0);

	error =	hammer_format_freemap(trans, volume);
	KKASSERT(error == 0);

	error = hammer_count_bigblocks(hmp, volume,
			&total_bigblocks, &empty_bigblocks);
	KKASSERT(error == 0);
	KKASSERT(total_bigblocks == empty_bigblocks);

	hammer_rel_volume(volume, 0);

	++hmp->nvolumes;
	error = hammer_update_volumes_header(trans,
			total_bigblocks, empty_bigblocks);
	KKASSERT(error == 0);

	hammer_unlock(&hmp->blkmap_lock);
	hammer_sync_unlock(trans);

	KKASSERT(error == 0);
end:
	hammer_unlock(&hmp->volume_lock);
	if (error)
		hmkprintf(hmp, "An error occurred: %d\n", error);
	return (error);
}
static int
hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
		 struct ucred *cred)
{
	struct hammer_mount_info info;
	hammer_mount_t hmp;
	hammer_volume_t rootvol;
	struct vnode *rootvp;
	struct vnode *devvp = NULL;
	const char *upath;	/* volume name in userspace */
	char *path;		/* volume name in system space */
	int error;
	int i;
	int master_id;
	char *next_volume_ptr = NULL;

	/*
	 * Accept hammer_mount_info.  mntpt is NULL for root mounts at boot.
	 */
	if (mntpt == NULL) {
		bzero(&info, sizeof(info));
		info.asof = 0;
		info.hflags = 0;
		info.nvolumes = 1;

		next_volume_ptr = mp->mnt_stat.f_mntfromname;

		/* Count number of volumes separated by ':' */
		for (char *p = next_volume_ptr; *p != '\0'; ++p) {
			if (*p == ':') {
				++info.nvolumes;
			}
		}

		mp->mnt_flag &= ~MNT_RDONLY; /* mount R/W */
	} else {
		if ((error = copyin(data, &info, sizeof(info))) != 0)
			return (error);
	}

	/*
	 * updating or new mount
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		hmp = (void *)mp->mnt_data;
		KKASSERT(hmp != NULL);
	} else {
		if (info.nvolumes <= 0 || info.nvolumes > HAMMER_MAX_VOLUMES)
			return (EINVAL);
		hmp = NULL;
	}

	/*
	 * master-id validation.  The master id may not be changed by a
	 * mount update.
	 */
	if (info.hflags & HMNT_MASTERID) {
		if (hmp && hmp->master_id != info.master_id) {
			kprintf("hammer: cannot change master id "
				"with mount update\n");
			return(EINVAL);
		}
		master_id = info.master_id;
		if (master_id < -1 || master_id >= HAMMER_MAX_MASTERS)
			return (EINVAL);
	} else {
		if (hmp)
			master_id = hmp->master_id;
		else
			master_id = 0;
	}

	/*
	 * Internal mount data structure
	 */
	if (hmp == NULL) {
		hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO);
		mp->mnt_data = (qaddr_t)hmp;
		hmp->mp = mp;
		/*TAILQ_INIT(&hmp->recycle_list);*/

		/*
		 * Make sure kmalloc type limits are set appropriately.
		 *
		 * Our inode kmalloc group is sized based on maxvnodes
		 * (controlled by the system, not us).
		 */
		kmalloc_create(&hmp->m_misc, "HAMMER-others");
		kmalloc_create(&hmp->m_inodes, "HAMMER-inodes");

		kmalloc_raise_limit(hmp->m_inodes, 0);	/* unlimited */

		hmp->root_btree_beg.localization = 0x00000000U;
		hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
		hmp->root_btree_beg.key = -0x8000000000000000LL;
		hmp->root_btree_beg.create_tid = 1;
		hmp->root_btree_beg.delete_tid = 1;
		hmp->root_btree_beg.rec_type = 0;
		hmp->root_btree_beg.obj_type = 0;

		hmp->root_btree_end.localization = 0xFFFFFFFFU;
		hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
		hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
		hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
		hmp->root_btree_end.delete_tid = 0;   /* special case */
		hmp->root_btree_end.rec_type = 0xFFFFU;
		hmp->root_btree_end.obj_type = 0;

		hmp->krate.freq = 1;	/* maximum reporting rate (hz) */
		hmp->krate.count = -16;	/* initial burst */

		hmp->sync_lock.refs = 1;
		hmp->free_lock.refs = 1;
		hmp->undo_lock.refs = 1;
		hmp->blkmap_lock.refs = 1;
		hmp->snapshot_lock.refs = 1;
		hmp->volume_lock.refs = 1;

		TAILQ_INIT(&hmp->delay_list);
		TAILQ_INIT(&hmp->flush_group_list);
		TAILQ_INIT(&hmp->objid_cache_list);
		TAILQ_INIT(&hmp->undo_lru_list);
		TAILQ_INIT(&hmp->reclaim_list);

		RB_INIT(&hmp->rb_dedup_crc_root);
		RB_INIT(&hmp->rb_dedup_off_root);	
		TAILQ_INIT(&hmp->dedup_lru_list);
	}
	hmp->hflags &= ~HMNT_USERFLAGS;
	hmp->hflags |= info.hflags & HMNT_USERFLAGS;

	hmp->master_id = master_id;

	if (info.asof) {
		mp->mnt_flag |= MNT_RDONLY;
		hmp->asof = info.asof;
	} else {
		hmp->asof = HAMMER_MAX_TID;
	}

	hmp->volume_to_remove = -1;

	/*
	 * Re-open read-write if originally read-only, or vise-versa.
	 *
	 * When going from read-only to read-write execute the stage2
	 * recovery if it has not already been run.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		lwkt_gettoken(&hmp->fs_token);
		error = 0;
		if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
			kprintf("HAMMER read-only -> read-write\n");
			hmp->ronly = 0;
			RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
				hammer_adjust_volume_mode, NULL);
			rootvol = hammer_get_root_volume(hmp, &error);
			if (rootvol) {
				hammer_recover_flush_buffers(hmp, rootvol, 1);
				error = hammer_recover_stage2(hmp, rootvol);
				bcopy(rootvol->ondisk->vol0_blockmap,
				      hmp->blockmap,
				      sizeof(hmp->blockmap));
				hammer_rel_volume(rootvol, 0);
			}
			RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
				hammer_reload_inode, NULL);
			/* kernel clears MNT_RDONLY */
		} else if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
			kprintf("HAMMER read-write -> read-only\n");
			hmp->ronly = 1;	/* messy */
			RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
				hammer_reload_inode, NULL);
			hmp->ronly = 0;
			hammer_flusher_sync(hmp);
			hammer_flusher_sync(hmp);
			hammer_flusher_sync(hmp);
			hmp->ronly = 1;
			RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
				hammer_adjust_volume_mode, NULL);
		}
		lwkt_reltoken(&hmp->fs_token);
		return(error);
	}

	RB_INIT(&hmp->rb_vols_root);
	RB_INIT(&hmp->rb_inos_root);
	RB_INIT(&hmp->rb_redo_root);
	RB_INIT(&hmp->rb_nods_root);
	RB_INIT(&hmp->rb_undo_root);
	RB_INIT(&hmp->rb_resv_root);
	RB_INIT(&hmp->rb_bufs_root);
	RB_INIT(&hmp->rb_pfsm_root);

	hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);

	RB_INIT(&hmp->volu_root);
	RB_INIT(&hmp->undo_root);
	RB_INIT(&hmp->data_root);
	RB_INIT(&hmp->meta_root);
	RB_INIT(&hmp->lose_root);
	TAILQ_INIT(&hmp->iorun_list);

	lwkt_token_init(&hmp->fs_token, "hammerfs");
	lwkt_token_init(&hmp->io_token, "hammerio");

	lwkt_gettoken(&hmp->fs_token);

	/*
	 * Load volumes
	 */
	path = objcache_get(namei_oc, M_WAITOK);
	hmp->nvolumes = -1;
	for (i = 0; i < info.nvolumes; ++i) {
		if (mntpt == NULL) {
			/*
			 * Root mount.
			 */
			KKASSERT(next_volume_ptr != NULL);
			strcpy(path, "");
			if (*next_volume_ptr != '/') {
				/* relative path */
				strcpy(path, "/dev/");
			}
			int k;
			for (k = strlen(path); k < MAXPATHLEN-1; ++k) {
				if (*next_volume_ptr == '\0') {
					break;
				} else if (*next_volume_ptr == ':') {
					++next_volume_ptr;
					break;
				} else {
					path[k] = *next_volume_ptr;
					++next_volume_ptr;
				}
			}
			path[k] = '\0';

			error = 0;
			cdev_t dev = kgetdiskbyname(path);
			error = bdevvp(dev, &devvp);
			if (error) {
				kprintf("hammer_mountroot: can't find devvp\n");
			}
		} else {
			error = copyin(&info.volumes[i], &upath,
				       sizeof(char *));
			if (error == 0)
				error = copyinstr(upath, path,
						  MAXPATHLEN, NULL);
		}
		if (error == 0)
			error = hammer_install_volume(hmp, path, devvp);
		if (error)
			break;
	}
	objcache_put(namei_oc, path);

	/*
	 * Make sure we found a root volume
	 */
	if (error == 0 && hmp->rootvol == NULL) {
		kprintf("hammer_mount: No root volume found!\n");
		error = EINVAL;
	}

	/*
	 * Check that all required volumes are available
	 */
	if (error == 0 && hammer_mountcheck_volumes(hmp)) {
		kprintf("hammer_mount: Missing volumes, cannot mount!\n");
		error = EINVAL;
	}

	if (error) {
		/* called with fs_token held */
		hammer_free_hmp(mp);
		return (error);
	}

	/*
	 * No errors, setup enough of the mount point so we can lookup the
	 * root vnode.
	 */
	mp->mnt_iosize_max = MAXPHYS;
	mp->mnt_kern_flag |= MNTK_FSMID;
	mp->mnt_kern_flag |= MNTK_THR_SYNC;	/* new vsyncscan semantics */

	/*
	 * MPSAFE code.  Note that VOPs and VFSops which are not MPSAFE
	 * will acquire a per-mount token prior to entry and release it
	 * on return, so even if we do not specify it we no longer get
	 * the BGL regardlless of how we are flagged.
	 */
	mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
	/*MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_IN_MPSAFE;*/

	/* 
	 * note: f_iosize is used by vnode_pager_haspage() when constructing
	 * its VOP_BMAP call.
	 */
	mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
	mp->mnt_stat.f_bsize = HAMMER_BUFSIZE;

	mp->mnt_vstat.f_frsize = HAMMER_BUFSIZE;
	mp->mnt_vstat.f_bsize = HAMMER_BUFSIZE;

	mp->mnt_maxsymlinklen = 255;
	mp->mnt_flag |= MNT_LOCAL;

	vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops);
	vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops);
	vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops);

	/*
	 * The root volume's ondisk pointer is only valid if we hold a
	 * reference to it.
	 */
	rootvol = hammer_get_root_volume(hmp, &error);
	if (error)
		goto failed;

	/*
	 * Perform any necessary UNDO operations.  The recovery code does
	 * call hammer_undo_lookup() so we have to pre-cache the blockmap,
	 * and then re-copy it again after recovery is complete.
	 *
	 * If this is a read-only mount the UNDO information is retained
	 * in memory in the form of dirty buffer cache buffers, and not
	 * written back to the media.
	 */
	bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
	      sizeof(hmp->blockmap));

	/*
	 * Check filesystem version
	 */
	hmp->version = rootvol->ondisk->vol_version;
	if (hmp->version < HAMMER_VOL_VERSION_MIN ||
	    hmp->version > HAMMER_VOL_VERSION_MAX) {
		kprintf("HAMMER: mount unsupported fs version %d\n",
			hmp->version);
		error = ERANGE;
		goto done;
	}

	/*
	 * The undo_rec_limit limits the size of flush groups to avoid
	 * blowing out the UNDO FIFO.  This calculation is typically in
	 * the tens of thousands and is designed primarily when small
	 * HAMMER filesystems are created.
	 */
	hmp->undo_rec_limit = hammer_undo_max(hmp) / 8192 + 100;
	if (hammer_debug_general & 0x0001)
		kprintf("HAMMER: undo_rec_limit %d\n", hmp->undo_rec_limit);

	/*
	 * NOTE: Recover stage1 not only handles meta-data recovery, it
	 * 	 also sets hmp->undo_seqno for HAMMER VERSION 4+ filesystems.
	 */
	error = hammer_recover_stage1(hmp, rootvol);
	if (error) {
		kprintf("Failed to recover HAMMER filesystem on mount\n");
		goto done;
	}

	/*
	 * Finish setup now that we have a good root volume.
	 *
	 * The top 16 bits of fsid.val[1] is a pfs id.
	 */
	ksnprintf(mp->mnt_stat.f_mntfromname,
		  sizeof(mp->mnt_stat.f_mntfromname), "%s",
		  rootvol->ondisk->vol_name);
	mp->mnt_stat.f_fsid.val[0] =
		crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8);
	mp->mnt_stat.f_fsid.val[1] =
		crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8);
	mp->mnt_stat.f_fsid.val[1] &= 0x0000FFFF;

	mp->mnt_vstat.f_fsid_uuid = rootvol->ondisk->vol_fsid;
	mp->mnt_vstat.f_fsid = crc32(&mp->mnt_vstat.f_fsid_uuid,
				     sizeof(mp->mnt_vstat.f_fsid_uuid));

	/*
	 * Certain often-modified fields in the root volume are cached in
	 * the hammer_mount structure so we do not have to generate lots
	 * of little UNDO structures for them.
	 *
	 * Recopy after recovery.  This also has the side effect of
	 * setting our cached undo FIFO's first_offset, which serves to
	 * placemark the FIFO start for the NEXT flush cycle while the
	 * on-disk first_offset represents the LAST flush cycle.
	 */
	hmp->next_tid = rootvol->ondisk->vol0_next_tid;
	hmp->flush_tid1 = hmp->next_tid;
	hmp->flush_tid2 = hmp->next_tid;
	bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap,
	      sizeof(hmp->blockmap));
	hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks;

	hammer_flusher_create(hmp);

	/*
	 * Locate the root directory using the root cluster's B-Tree as a
	 * starting point.  The root directory uses an obj_id of 1.
	 *
	 * FUTURE: Leave the root directory cached referenced but unlocked
	 * in hmp->rootvp (need to flush it on unmount).
	 */
	error = hammer_vfs_vget(mp, NULL, 1, &rootvp);
	if (error)
		goto done;
	vput(rootvp);
	/*vn_unlock(hmp->rootvp);*/
	if (hmp->ronly == 0)
		error = hammer_recover_stage2(hmp, rootvol);

	/*
	 * If the stage2 recovery fails be sure to clean out all cached
	 * vnodes before throwing away the mount structure or bad things
	 * will happen.
	 */
	if (error)
		vflush(mp, 0, 0);

done:
	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
		/* New mount */

		/* Populate info for mount point (NULL pad)*/
		bzero(mp->mnt_stat.f_mntonname, MNAMELEN);
		size_t size;
		if (mntpt) {
			copyinstr(mntpt, mp->mnt_stat.f_mntonname,
							MNAMELEN -1, &size);
		} else { /* Root mount */
			mp->mnt_stat.f_mntonname[0] = '/';
		}
	}
	(void)VFS_STATFS(mp, &mp->mnt_stat, cred);
	hammer_rel_volume(rootvol, 0);
failed:
	/*
	 * Cleanup and return.
	 */
	if (error) {
		/* called with fs_token held */
		hammer_free_hmp(mp);
	} else {
		lwkt_reltoken(&hmp->fs_token);
	}
	return (error);
}
int
hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
		struct hammer_ioc_volume *ioc)
{
	struct hammer_mount *hmp = trans->hmp;
	struct mount *mp = hmp->mp;
	hammer_volume_t volume;
	int error;

	if (mp->mnt_flag & MNT_RDONLY) {
		kprintf("Cannot add volume to read-only HAMMER filesystem\n");
		return (EINVAL);
	}

	if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
		kprintf("Max number of HAMMER volumes exceeded\n");
		return (EINVAL);
	}

	if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
		kprintf("Another volume operation is in progress!\n");
		return (EAGAIN);
	}

	/*
	 * Find an unused volume number.
	 */
	int free_vol_no = 0;
	while (free_vol_no < HAMMER_MAX_VOLUMES &&
	       RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
		++free_vol_no;
	}
	if (free_vol_no >= HAMMER_MAX_VOLUMES) {
		kprintf("Max number of HAMMER volumes exceeded\n");
		hammer_unlock(&hmp->volume_lock);
		return (EINVAL);
	}

	struct vnode *devvp = NULL;
	error = hammer_setup_device(&devvp, ioc->device_name, 0);
	if (error)
		goto end;
	KKASSERT(devvp);
	error = hammer_format_volume_header(
		hmp,
		devvp,
		hmp->rootvol->ondisk->vol_name,
		free_vol_no,
		hmp->nvolumes+1,
		ioc->vol_size,
		ioc->boot_area_size,
		ioc->mem_area_size);
	hammer_close_device(&devvp, 0);
	if (error)
		goto end;

	error = hammer_install_volume(hmp, ioc->device_name, NULL);
	if (error)
		goto end;

	hammer_sync_lock_sh(trans);
	hammer_lock_ex(&hmp->blkmap_lock);

	++hmp->nvolumes;

	/*
	 * Set each volumes new value of the vol_count field.
	 */
	for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
		volume = hammer_get_volume(hmp, vol_no, &error);
		if (volume == NULL && error == ENOENT) {
			/*
			 * Skip unused volume numbers
			 */
			error = 0;
			continue;
		}
		KKASSERT(volume != NULL && error == 0);
		hammer_modify_volume_field(trans, volume, vol_count);
		volume->ondisk->vol_count = hmp->nvolumes;
		hammer_modify_volume_done(volume);

		/*
		 * Only changes to the header of the root volume
		 * are automatically flushed to disk. For all
		 * other volumes that we modify we do it here.
		 *
		 * No interlock is needed, volume buffers are not
		 * messed with by bioops.
		 */
		if (volume != trans->rootvol && volume->io.modified) {
			hammer_crc_set_volume(volume->ondisk);
			hammer_io_flush(&volume->io, 0);
		}

		hammer_rel_volume(volume, 0);
	}

	volume = hammer_get_volume(hmp, free_vol_no, &error);
	KKASSERT(volume != NULL && error == 0);

	struct bigblock_stat stat;
	error =	hammer_format_freemap(trans, volume, &stat);
	KKASSERT(error == 0);

	/*
	 * Increase the total number of bigblocks and update stat/vstat totals.
	 */
	hammer_modify_volume_field(trans, trans->rootvol,
		vol0_stat_bigblocks);
	trans->rootvol->ondisk->vol0_stat_bigblocks += stat.total_bigblocks;
	hammer_modify_volume_done(trans->rootvol);
	/*
	 * Bigblock count changed so recompute the total number of blocks.
	 */
	mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
	    (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE);
	mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
	    (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE);

	/*
	 * Increase the number of free bigblocks
	 * (including the copy in hmp)
	 */
	hammer_modify_volume_field(trans, trans->rootvol,
		vol0_stat_freebigblocks);
	trans->rootvol->ondisk->vol0_stat_freebigblocks += stat.total_free_bigblocks;
	hmp->copy_stat_freebigblocks =
		trans->rootvol->ondisk->vol0_stat_freebigblocks;
	hammer_modify_volume_done(trans->rootvol);

	hammer_rel_volume(volume, 0);

	hammer_unlock(&hmp->blkmap_lock);
	hammer_sync_unlock(trans);

	KKASSERT(error == 0);
end:
	hammer_unlock(&hmp->volume_lock);
	if (error)
		kprintf("An error occurred: %d\n", error);
	return (error);
}