示例#1
0
/*
 * Destroy a PFS
 *
 * We can destroy a PFS by scanning and deleting all of its records in the
 * B-Tree.  The hammer utility will delete the softlink in the primary
 * filesystem.
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 */
int
hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_pseudofs_inmem_t pfsm;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);

	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
		return(error);

	pfsm = hammer_load_pseudofs(trans, localization, &error);
	if (error == 0) {
		error = hammer_pfs_rollback(trans, pfsm, 0);
		if (error == 0) {
			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
			error = hammer_save_pseudofs(trans, pfsm);
		}
	}
	hammer_rel_pseudofs(trans->hmp, pfsm);
	if (error == EINTR) {
		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
	return(error);
}
示例#2
0
/*
 * Downgrade a master to a slave
 *
 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
 *
 * We previously did not update sync_end_tid in consideration for a slave
 * upgraded to a master and then downgraded again, but this completely breaks
 * the case where one starts with a master and then downgrades to a slave,
 * then upgrades again.
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 */
int
hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_mount_t hmp = trans->hmp;
	hammer_pseudofs_inmem_t pfsm;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
		return(error);

	pfsm = hammer_load_pseudofs(trans, localization, &error);
	if (error == 0) {
		if (hammer_is_pfs_master(&pfsm->pfsd)) {
			pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
			if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
				pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
			error = hammer_save_pseudofs(trans, pfsm);
		}
	}
	hammer_rel_pseudofs(trans->hmp, pfsm);
	return (error);
}
示例#3
0
/*
 * Upgrade a slave to a master
 *
 * This is fairly easy to do, but we must physically undo any partial syncs
 * for transaction ids > sync_end_tid.  Effective, we must do a partial
 * rollback.
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 */
int
hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_pseudofs_inmem_t pfsm;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
		return(error);

	/*
	 * A master id must be set when upgrading
	 */
	pfsm = hammer_load_pseudofs(trans, localization, &error);
	if (error == 0) {
		if (hammer_is_pfs_slave(&pfsm->pfsd)) {
			error = hammer_pfs_rollback(trans, pfsm,
					    pfsm->pfsd.sync_end_tid + 1);
			if (error == 0) {
				pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
				error = hammer_save_pseudofs(trans, pfsm);
			}
		}
	}
	hammer_rel_pseudofs(trans->hmp, pfsm);
	if (error == EINTR) {
		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
	return (error);
}
示例#4
0
/*
 * Iterate PFS ondisk data.
 * This function essentially does the same as hammer_load_pseudofs()
 * except that this function only retrieves PFS data without touching
 * hammer_pfs_rb_tree at all.
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 *
 * NOTE: The API was changed in DragonFly 4.7, due to design issues
 * this ioctl and libhammer (which is the only caller of this ioctl
 * within DragonFly source, but no longer maintained by anyone) had.
 */
int
hammer_ioc_scan_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	struct hammer_cursor cursor;
	hammer_inode_t dip;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	pfs->bytes = sizeof(struct hammer_pseudofs_data);
	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;

	dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
		HAMMER_DEF_LOCALIZATION, 0, &error);

	error = hammer_init_cursor(trans, &cursor,
		(dip ? &dip->cache[1] : NULL), dip);
	if (error)
		goto fail;

	cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION |
				      HAMMER_LOCALIZE_MISC;
	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.key = localization;
	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_ASOF;

	error = hammer_ip_lookup(&cursor);
	if (error == 0) {
		error = hammer_ip_resolve_data(&cursor);
		if (error == 0) {
			if (pfs->ondisk)
				copyout(cursor.data, pfs->ondisk, cursor.leaf->data_len);
			localization = cursor.leaf->base.key;
			pfs->pfs_id = lo_to_pfs(localization);
		}
	}
	hammer_done_cursor(&cursor);
fail:
	if (dip)
		hammer_rel_inode(dip, 0);
	return(error);
}
示例#5
0
/*
 * Set mirroring/pseudo-fs information
 */
int
hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_pseudofs_inmem_t pfsm;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
		error = EINVAL;

	if (error == 0 && pfs->ondisk) {
		/*
		 * Load the PFS so we can modify our in-core copy.  Ignore
		 * ENOENT errors.
		 */
		pfsm = hammer_load_pseudofs(trans, localization, &error);
		error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));

		/*
		 * Save it back, create a root inode if we are in master
		 * mode and no root exists.
		 *
		 * We do not create root inodes for slaves, the root inode
		 * must be mirrored from the master.
		 */
		if (error == 0 && hammer_is_pfs_master(&pfsm->pfsd)) {
			error = hammer_mkroot_pseudofs(trans, cred, pfsm, ip);
		}
		if (error == 0)
			error = hammer_save_pseudofs(trans, pfsm);

		/*
		 * Wakeup anyone waiting for a TID update for this PFS
		 */
		wakeup(&pfsm->pfsd.sync_end_tid);
		hammer_rel_pseudofs(trans->hmp, pfsm);
	}
	return(error);
}
示例#6
0
/*
 * Get mirroring/pseudo-fs information
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 */
int
hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_pseudofs_inmem_t pfsm;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	pfs->bytes = sizeof(struct hammer_pseudofs_data);
	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;

	pfsm = hammer_load_pseudofs(trans, localization, &error);
	if (error) {
		hammer_rel_pseudofs(trans->hmp, pfsm);
		return(error);
	}

	/*
	 * If the PFS is a master the sync tid is set by normal operation
	 * rather than the mirroring code, and will always track the
	 * real HAMMER filesystem.
	 *
	 * We use flush_tid1, which is the highest fully committed TID.
	 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
	 * caught up to it yet so a crash will roll us back to flush_tid1.
	 */
	if (hammer_is_pfs_master(&pfsm->pfsd))
		pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;

	/*
	 * Copy out to userland.
	 */
	if (pfs->ondisk)
		error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
	hammer_rel_pseudofs(trans->hmp, pfsm);
	return(error);
}
示例#7
0
/*
 * Wait for the PFS to sync past the specified TID
 */
int
hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			 struct hammer_ioc_pseudofs_rw *pfs)
{
	hammer_pseudofs_inmem_t pfsm;
	struct hammer_pseudofs_data pfsd;
	uint32_t localization;
	hammer_tid_t tid;
	void *waitp;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);

	if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
		return(error);

	pfsm = hammer_load_pseudofs(trans, localization, &error);
	if (error == 0) {
		if (hammer_is_pfs_slave(&pfsm->pfsd)) {
			tid = pfsm->pfsd.sync_end_tid;
			waitp = &pfsm->pfsd.sync_end_tid;
		} else {
			tid = trans->hmp->flush_tid1;
			waitp = &trans->hmp->flush_tid1;
		}
		if (tid <= pfsd.sync_end_tid)
			tsleep(waitp, PCATCH, "hmrmwt", 0);
	}
	hammer_rel_pseudofs(trans->hmp, pfsm);
	if (error == EINTR) {
		pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
	return(error);
}
示例#8
0
/*
 * All B-Tree records within the specified key range which also conform
 * to the transaction id range are returned.  Mirroring code keeps track
 * of the last transaction id fully scanned and can efficiently pick up
 * where it left off if interrupted.
 *
 * The PFS is identified in the mirror structure.  The passed ip is just
 * some directory in the overall HAMMER filesystem and has nothing to
 * do with the PFS.
 */
int
hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip,
		       struct hammer_ioc_mirror_rw *mirror)
{
	struct hammer_cmirror cmirror;
	struct hammer_cursor cursor;
	union hammer_ioc_mrecord_any mrec;
	hammer_btree_leaf_elm_t elm;
	char *uptr;
	int error;
	int data_len;
	int bytes;
	int eatdisk;
	int mrec_flags;
	uint32_t localization;
	hammer_crc_t rec_crc;

	localization = pfs_to_lo(mirror->pfs_id);

	if ((mirror->key_beg.localization | mirror->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0)
		return(EINVAL);

	mirror->key_cur = mirror->key_beg;
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_cur.localization |= localization;
	bzero(&mrec, sizeof(mrec));
	bzero(&cmirror, sizeof(cmirror));

	/*
	 * Make CRC errors non-fatal (at least on data), causing an EDOM
	 * error instead of EIO.
	 */
	trans->flags |= HAMMER_TRANSF_CRCDOM;

retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg = mirror->key_cur;
	cursor.key_end = mirror->key_end;
	cursor.key_end.localization &= HAMMER_LOCALIZE_MASK;
	cursor.key_end.localization |= localization;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	/*
	 * This flag filters the search to only return elements whos create
	 * or delete TID is >= mirror_tid.  The B-Tree uses the mirror_tid
	 * field stored with internal and leaf nodes to shortcut the scan.
	 */
	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
	cursor.cmirror = &cmirror;
	cmirror.mirror_tid = mirror->tid_beg;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Yield to more important tasks
		 */
		if (error == 0) {
			error = hammer_signal_check(trans->hmp);
			if (error)
				break;
		}

		/*
		 * An internal node can be returned in mirror-filtered
		 * mode and indicates that the scan is returning a skip
		 * range in the cursor->cmirror structure.
		 */
		uptr = (char *)mirror->ubuf + mirror->count;
		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
			/*
			 * Check space
			 */
			mirror->key_cur = cmirror.skip_beg;
			bytes = sizeof(mrec.skip);
			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
			    mirror->size) {
				break;
			}

			/*
			 * Fill mrec
			 */
			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
			mrec.head.type = HAMMER_MREC_TYPE_SKIP;
			mrec.head.rec_size = bytes;
			mrec.skip.skip_beg = cmirror.skip_beg;
			mrec.skip.skip_end = cmirror.skip_end;
			hammer_crc_set_mrec_head(&mrec.head, bytes);
			error = copyout(&mrec, uptr, bytes);
			eatdisk = 0;
			goto didwrite;
		}

		/*
		 * Leaf node.  In full-history mode we could filter out
		 * elements modified outside the user-requested TID range.
		 *
		 * However, such elements must be returned so the writer
		 * can compare them against the target to determine what
		 * needs to be deleted on the target, particular for
		 * no-history mirrors.
		 */
		KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
		mirror->key_cur = elm->base;

		/*
		 * If the record was created after our end point we just
		 * ignore it.
		 */
		if (elm->base.create_tid > mirror->tid_end) {
			error = 0;
			bytes = 0;
			eatdisk = 1;
			goto didwrite;
		}

		/*
		 * Determine if we should generate a PASS or a REC.  PASS
		 * records are records without any data payload.  Such
		 * records will be generated if the target is already expected
		 * to have the record, allowing it to delete the gaps.
		 *
		 * A PASS record is also used to perform deletions on the
		 * target.
		 *
		 * Such deletions are needed if the master or files on the
		 * master are no-history, or if the slave is so far behind
		 * the master has already been pruned.
		 */
		if (elm->base.create_tid < mirror->tid_beg) {
			bytes = sizeof(mrec.rec);
			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
			    mirror->size) {
				break;
			}

			/*
			 * Fill mrec.
			 */
			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
			mrec.head.type = HAMMER_MREC_TYPE_PASS;
			mrec.head.rec_size = bytes;
			mrec.rec.leaf = *elm;
			hammer_crc_set_mrec_head(&mrec.head, bytes);
			error = copyout(&mrec, uptr, bytes);
			eatdisk = 1;
			goto didwrite;
		}

		/*
		 * The core code exports the data to userland.
		 *
		 * CRC errors on data are reported but passed through,
		 * but the data must be washed by the user program.
		 *
		 * If userland just wants the btree records it can
		 * request that bulk data not be returned.  This is
		 * use during mirror-stream histogram generation.
		 */
		mrec_flags = 0;
		data_len = (elm->data_offset) ? elm->data_len : 0;
		if (data_len &&
		    (mirror->head.flags & HAMMER_IOC_MIRROR_NODATA)) {
			data_len = 0;
			mrec_flags |= HAMMER_MRECF_NODATA;
		}
		if (data_len) {
			error = hammer_btree_extract_data(&cursor);
			if (error) {
				if (error != EDOM)
					break;
				mrec_flags |= HAMMER_MRECF_CRC_ERROR |
					      HAMMER_MRECF_DATA_CRC_BAD;
			}
		}

		bytes = sizeof(mrec.rec) + data_len;
		if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size)
			break;

		/*
		 * Construct the record for userland and copyout.
		 *
		 * The user is asking for a snapshot, if the record was
		 * deleted beyond the user-requested ending tid, the record
		 * is not considered deleted from the point of view of
		 * userland and delete_tid is cleared.
		 */
		mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
		mrec.head.type = HAMMER_MREC_TYPE_REC | mrec_flags;
		mrec.head.rec_size = bytes;
		mrec.rec.leaf = *elm;

		if (elm->base.delete_tid > mirror->tid_end)
			mrec.rec.leaf.base.delete_tid = 0;
		rec_crc = hammer_crc_get_mrec_head(&mrec.head, sizeof(mrec.rec));
		if (data_len)
			rec_crc = crc32_ext(cursor.data, data_len, rec_crc);
		mrec.head.rec_crc = rec_crc;
		error = copyout(&mrec, uptr, sizeof(mrec.rec));
		if (data_len && error == 0) {
			error = copyout(cursor.data, uptr + sizeof(mrec.rec),
					data_len);
		}
		eatdisk = 1;

		/*
		 * eatdisk controls whether we skip the current cursor
		 * position on the next scan or not.  If doing a SKIP
		 * the cursor is already positioned properly for the next
		 * scan and eatdisk will be 0.
		 */
didwrite:
		if (error == 0) {
			mirror->count += HAMMER_HEAD_DOALIGN(bytes);
			if (eatdisk)
				cursor.flags |= HAMMER_CURSOR_ATEDISK;
			else
				cursor.flags &= ~HAMMER_CURSOR_ATEDISK;
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT) {
		mirror->key_cur = mirror->key_end;
		error = 0;
	}
	hammer_done_cursor(&cursor);
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		mirror->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}
示例#9
0
/*
 * Copy records from userland to the target mirror.
 *
 * The PFS is identified in the mirror structure.  The passed ip is just
 * some directory in the overall HAMMER filesystem and has nothing to
 * do with the PFS.  In fact, there might not even be a root directory for
 * the PFS yet!
 */
int
hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip,
		       struct hammer_ioc_mirror_rw *mirror)
{
	union hammer_ioc_mrecord_any mrec;
	struct hammer_cursor cursor;
	uint32_t localization;
	int checkspace_count = 0;
	int error;
	int bytes;
	char *uptr;
	int seq;

	localization = pfs_to_lo(mirror->pfs_id);
	seq = trans->hmp->flusher.done;

	/*
	 * Validate the mirror structure and relocalize the tracking keys.
	 */
	if (mirror->size < 0 || mirror->size > 0x70000000)
		return(EINVAL);
	mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_beg.localization |= localization;
	mirror->key_end.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_end.localization |= localization;
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_cur.localization |= localization;

	/*
	 * Set up our tracking cursor for the loop.  The tracking cursor
	 * is used to delete records that are no longer present on the
	 * master.  The last handled record at key_cur must be skipped.
	 */
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);

	cursor.key_beg = mirror->key_cur;
	cursor.key_end = mirror->key_end;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	error = hammer_btree_first(&cursor);
	if (error == 0)
		cursor.flags |= HAMMER_CURSOR_ATEDISK;
	if (error == ENOENT)
		error = 0;

	/*
	 * Loop until our input buffer has been exhausted.
	 */
	while (error == 0 &&
		mirror->count + sizeof(mrec.head) <= mirror->size) {

	        /*
		 * Don't blow out the buffer cache.  Leave room for frontend
		 * cache as well.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * If there is insufficient free space it may be due to
		 * reserved big-blocks, which flushing might fix.
		 */
		if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
		}


		/*
		 * Acquire and validate header
		 */
		if ((bytes = mirror->size - mirror->count) > sizeof(mrec))
			bytes = sizeof(mrec);
		uptr = (char *)mirror->ubuf + mirror->count;
		error = copyin(uptr, &mrec, bytes);
		if (error)
			break;
		if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) {
			error = EINVAL;
			break;
		}
		if (mrec.head.rec_size < sizeof(mrec.head) ||
		    mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE ||
		    mirror->count + mrec.head.rec_size > mirror->size) {
			error = EINVAL;
			break;
		}

		switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) {
		case HAMMER_MREC_TYPE_SKIP:
			if (mrec.head.rec_size != sizeof(mrec.skip))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization);
			break;
		case HAMMER_MREC_TYPE_REC:
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec));
			break;
		case HAMMER_MREC_TYPE_REC_NODATA:
		case HAMMER_MREC_TYPE_REC_BADCRC:
			/*
			 * Records with bad data payloads are ignored XXX.
			 * Records with no data payload have to be skipped
			 * (they shouldn't have been written in the first
			 * place).
			 */
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			break;
		case HAMMER_MREC_TYPE_PASS:
			if (mrec.head.rec_size != sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization);
			break;
		default:
			error = EINVAL;
			break;
		}

		/*
		 * Retry the current record on deadlock, otherwise setup
		 * for the next loop.
		 */
		if (error == EDEADLK) {
			while (error == EDEADLK) {
				hammer_sync_lock_sh(trans);
				hammer_recover_cursor(&cursor);
				error = hammer_cursor_upgrade(&cursor);
				hammer_sync_unlock(trans);
			}
		} else {
			if (error == EALREADY)
				error = 0;
			if (error == 0) {
				mirror->count +=
					HAMMER_HEAD_DOALIGN(mrec.head.rec_size);
			}
		}
	}
	hammer_done_cursor(&cursor);

	/*
	 * cumulative error
	 */
	if (error) {
		mirror->head.flags |= HAMMER_IOC_HEAD_ERROR;
		mirror->head.error = error;
	}

	/*
	 * ioctls don't update the RW data structure if an error is returned,
	 * always return 0.
	 */
	return(0);
}
示例#10
0
int
hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_reblock *reblock)
{
	struct hammer_cursor cursor;
	hammer_btree_elm_t elm;
	int checkspace_count;
	int error;
	int seq;
	int slop;
	uint32_t key_end_localization;

	if ((reblock->key_beg.localization | reblock->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
		return(EINVAL);
	if (reblock->free_level < 0 ||
	    reblock->free_level > HAMMER_BIGBLOCK_SIZE)
		return(EINVAL);

	/*
	 * A fill_percentage <= 20% is considered an emergency.  free_level is
	 * inverted from fill_percentage.
	 */
	if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10)
		slop = HAMMER_CHKSPC_EMERGENCY;
	else
		slop = HAMMER_CHKSPC_REBLOCK;

	/*
	 * Ioctl caller has only set localization type to reblock.
	 * Initialize cursor key localization with ip localization.
	 */
	reblock->key_cur = reblock->key_beg;
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	if (reblock->allpfs == 0)
		reblock->key_cur.localization |= ip->obj_localization;

	key_end_localization = reblock->key_end.localization;
	key_end_localization &= HAMMER_LOCALIZE_MASK;
	if (reblock->allpfs == 0)
		key_end_localization |= ip->obj_localization;
	else
		key_end_localization |= pfs_to_lo(HAMMER_MAX_PFSID);

	checkspace_count = 0;
	seq = trans->hmp->flusher.done;
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = reblock->key_cur.localization;
	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = key_end_localization;
	cursor.key_end.obj_id = reblock->key_end.obj_id;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;

	/*
	 * This flag allows the btree scan code to return internal nodes,
	 * so we can reblock them in addition to the leafs.  Only specify it
	 * if we intend to reblock B-Tree nodes.
	 */
	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
		cursor.flags |= HAMMER_CURSOR_REBLOCKING;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Internal or Leaf node
		 */
		KKASSERT(cursor.index < cursor.node->ondisk->count);
		elm = &cursor.node->ondisk->elms[cursor.index];
		reblock->key_cur.obj_id = elm->base.obj_id;
		reblock->key_cur.localization = elm->base.localization;

		/*
		 * Filesystem went read-only during rebalancing
		 */
		if (trans->hmp->ronly) {
			error = EROFS;
			break;
		}

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		/*
		 * If there is insufficient free space it may be due to
		 * reserved big-blocks, which flushing might fix.
		 *
		 * We must force a retest in case the unlocked cursor is
		 * moved to the end of the leaf, or moved to an internal
		 * node.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		if (hammer_checkspace(trans->hmp, slop)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			cursor.flags |= HAMMER_CURSOR_RETEST;
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
			goto skip;
		}

		/*
		 * Acquiring the sync_lock prevents the operation from
		 * crossing a synchronization boundary.
		 *
		 * NOTE: cursor.node may have changed on return.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		hammer_sync_lock_sh(trans);
		error = hammer_reblock_helper(reblock, &cursor, elm);
		hammer_sync_unlock(trans);

		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * Setup for iteration, our cursor flags may be modified by
		 * other threads while we are unlocked.
		 */
		cursor.flags |= HAMMER_CURSOR_ATEDISK;

		/*
		 * We allocate data buffers, which atm we don't track
		 * dirty levels for because we allow the kernel to write
		 * them.  But if we allocate too many we can still deadlock
		 * the buffer cache.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 *	    (The cursor's node and element may change!)
		 */
		if (bd_heatup()) {
			hammer_unlock_cursor(&cursor);
			bwillwrite(HAMMER_XBUFSIZE);
			hammer_lock_cursor(&cursor);
		}
		vm_wait_nominal();
skip:
		if (error == 0) {
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EWOULDBLOCK) {
		hammer_flusher_sync(trans->hmp);
		goto retry;
	}
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}