コード例 #1
0
ファイル: hammer_mirror.c プロジェクト: Gwenio/DragonFlyBSD
/*
 * Copy records from userland to the target mirror.
 *
 * The PFS is identified in the mirror structure.  The passed ip is just
 * some directory in the overall HAMMER filesystem and has nothing to
 * do with the PFS.  In fact, there might not even be a root directory for
 * the PFS yet!
 */
int
hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip,
		       struct hammer_ioc_mirror_rw *mirror)
{
	union hammer_ioc_mrecord_any mrec;
	struct hammer_cursor cursor;
	u_int32_t localization;
	int checkspace_count = 0;
	int error;
	int bytes;
	char *uptr;
	int seq;

	localization = (u_int32_t)mirror->pfs_id << 16;
	seq = trans->hmp->flusher.done;

	/*
	 * Validate the mirror structure and relocalize the tracking keys.
	 */
	if (mirror->size < 0 || mirror->size > 0x70000000)
		return(EINVAL);
	mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_beg.localization += localization;
	mirror->key_end.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_end.localization += localization;
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_cur.localization += localization;

	/*
	 * Set up our tracking cursor for the loop.  The tracking cursor
	 * is used to delete records that are no longer present on the
	 * master.  The last handled record at key_cur must be skipped.
	 */
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);

	cursor.key_beg = mirror->key_cur;
	cursor.key_end = mirror->key_end;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	error = hammer_btree_first(&cursor);
	if (error == 0)
		cursor.flags |= HAMMER_CURSOR_ATEDISK;
	if (error == ENOENT)
		error = 0;

	/*
	 * Loop until our input buffer has been exhausted.
	 */
	while (error == 0 &&
		mirror->count + sizeof(mrec.head) <= mirror->size) {

	        /*
		 * Don't blow out the buffer cache.  Leave room for frontend
		 * cache as well.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * If there is insufficient free space it may be due to
		 * reserved bigblocks, which flushing might fix.
		 */
		if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
		}


		/*
		 * Acquire and validate header
		 */
		if ((bytes = mirror->size - mirror->count) > sizeof(mrec))
			bytes = sizeof(mrec);
		uptr = (char *)mirror->ubuf + mirror->count;
		error = copyin(uptr, &mrec, bytes);
		if (error)
			break;
		if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) {
			error = EINVAL;
			break;
		}
		if (mrec.head.rec_size < sizeof(mrec.head) ||
		    mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE ||
		    mirror->count + mrec.head.rec_size > mirror->size) {
			error = EINVAL;
			break;
		}

		switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) {
		case HAMMER_MREC_TYPE_SKIP:
			if (mrec.head.rec_size != sizeof(mrec.skip))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization);
			break;
		case HAMMER_MREC_TYPE_REC:
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec));
			break;
		case HAMMER_MREC_TYPE_REC_NODATA:
		case HAMMER_MREC_TYPE_REC_BADCRC:
			/*
			 * Records with bad data payloads are ignored XXX.
			 * Records with no data payload have to be skipped
			 * (they shouldn't have been written in the first
			 * place).
			 */
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			break;
		case HAMMER_MREC_TYPE_PASS:
			if (mrec.head.rec_size != sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization);
			break;
		default:
			error = EINVAL;
			break;
		}

		/*
		 * Retry the current record on deadlock, otherwise setup
		 * for the next loop.
		 */
		if (error == EDEADLK) {
			while (error == EDEADLK) {
				hammer_sync_lock_sh(trans);
				hammer_recover_cursor(&cursor);
				error = hammer_cursor_upgrade(&cursor);
				hammer_sync_unlock(trans);
			}
		} else {
			if (error == EALREADY)
				error = 0;
			if (error == 0) {
				mirror->count += 
					HAMMER_HEAD_DOALIGN(mrec.head.rec_size);
			}
		}
	}
	hammer_done_cursor(&cursor);

	/*
	 * cumulative error 
	 */
	if (error) {
		mirror->head.flags |= HAMMER_IOC_HEAD_ERROR;
		mirror->head.error = error;
	}

	/*
	 * ioctls don't update the RW data structure if an error is returned,
	 * always return 0.
	 */
	return(0);
}
コード例 #2
0
/*
 * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
 *
 * XXX We have no visibility into internal B-Tree nodes at the moment,
 * only leaf nodes.
 */
static int
hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	hammer_mount_t hmp;
	hammer_off_t tmp_offset;
	hammer_node_ondisk_t ondisk;
	struct hammer_btree_leaf_elm leaf;
	int error;
	int bytes;
	int cur;
	int iocflags;

	error = 0;
	hmp = cursor->trans->hmp;

	/*
	 * Reblock data.  Note that data embedded in a record is reblocked
	 * by the record reblock code.  Data processing only occurs at leaf
	 * nodes and for RECORD element types.
	 */
	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
		goto skip;
	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
		return(0);
	tmp_offset = elm->leaf.data_offset;
	if (tmp_offset == 0)
		goto skip;
	if (error)
		goto skip;

	/*
	 * NOTE: Localization restrictions may also have been set-up, we can't
	 *	 just set the match flags willy-nilly here.
	 */
	switch (elm->leaf.base.rec_type) {
	case HAMMER_RECTYPE_INODE:
	case HAMMER_RECTYPE_SNAPSHOT:
	case HAMMER_RECTYPE_CONFIG:
		iocflags = HAMMER_IOC_DO_INODES;
		break;
	case HAMMER_RECTYPE_EXT:
	case HAMMER_RECTYPE_FIX:
	case HAMMER_RECTYPE_PFS:
	case HAMMER_RECTYPE_DIRENTRY:
		iocflags = HAMMER_IOC_DO_DIRS;
		break;
	case HAMMER_RECTYPE_DATA:
	case HAMMER_RECTYPE_DB:
		iocflags = HAMMER_IOC_DO_DATA;
		break;
	default:
		iocflags = 0;
		break;
	}
	if (reblock->head.flags & iocflags) {
		++reblock->data_count;
		reblock->data_byte_count += elm->leaf.data_len;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			kprintf("D %6d/%d\n", bytes, reblock->free_level);
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			/*
			 * This is nasty, the uncache code may have to get
			 * vnode locks and because of that we can't hold
			 * the cursor locked.
			 *
			 * WARNING: See warnings in hammer_unlock_cursor()
			 *	    function.
			 */
			leaf = elm->leaf;
			hammer_unlock_cursor(cursor);
			hammer_io_direct_uncache(hmp, &leaf);
			hammer_lock_cursor(cursor);

			/*
			 * elm may have become stale or invalid, reload it.
			 * ondisk variable is temporary only.  Note that
			 * cursor->node and thus cursor->node->ondisk may
			 * also changed.
			 */
			ondisk = cursor->node->ondisk;
			elm = &ondisk->elms[cursor->index];
			if (cursor->flags & HAMMER_CURSOR_RETEST) {
				kprintf("hammer: debug: retest on "
					"reblocker uncache\n");
				error = EDEADLK;
			} else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF ||
				   cursor->index >= ondisk->count) {
				kprintf("hammer: debug: shifted on "
					"reblocker uncache\n");
				error = EDEADLK;
			} else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) {
				kprintf("hammer: debug: changed on "
					"reblocker uncache\n");
				error = EDEADLK;
			}
			if (error == 0)
				error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				KKASSERT(cursor->index < ondisk->count);
				error = hammer_reblock_data(reblock,
							    cursor, elm);
			}
			if (error == 0) {
				++reblock->data_moves;
				reblock->data_byte_moves += elm->leaf.data_len;
			}
		}
	}

skip:
	/*
	 * Reblock a B-Tree internal or leaf node.  A leaf node is reblocked
	 * on initial entry only (element 0).  An internal node is reblocked
	 * when entered upward from its first leaf node only (also element 0).
	 * Further revisits of the internal node (index > 0) are ignored.
	 */
	tmp_offset = cursor->node->node_offset;
	if (cursor->index == 0 &&
	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
		++reblock->btree_count;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			kprintf("B %6d/%d\n", bytes, reblock->free_level);
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				if (cursor->parent) {
					KKASSERT(cursor->parent_index <
						 cursor->parent->ondisk->count);
					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
				} else {
					elm = NULL;
				}
				switch(cursor->node->ondisk->type) {
				case HAMMER_BTREE_TYPE_LEAF:
					error = hammer_reblock_leaf_node(
							reblock, cursor, elm);
					break;
				case HAMMER_BTREE_TYPE_INTERNAL:
					error = hammer_reblock_int_node(
							reblock, cursor, elm);
					break;
				default:
					panic("Illegal B-Tree node type");
				}
			}
			if (error == 0) {
				++reblock->btree_moves;
			}
		}
	}

	hammer_cursor_downgrade(cursor);
	return(error);
}
コード例 #3
0
/*
 * NOTE: THIS CODE HAS BEEN REMOVED!  Pruning no longer attempts to realign
 *	 adjacent records because it seriously interferes with every 
 *	 mirroring algorithm I could come up with.
 *
 *	 This means that historical accesses beyond the first snapshot
 *	 softlink should be on snapshot boundaries only.  Historical
 *	 accesses from "now" to the first snapshot softlink continue to
 *	 be fine-grained.
 *
 * NOTE: It also looks like there's a bug in the removed code.  It is believed
 *	 that create_tid can sometimes get set to 0xffffffffffffffff.  Just as
 *	 well we no longer try to do this fancy shit.  Probably the attempt to
 *	 correct the rhb is blowing up the cursor's indexing or addressing mapping.
 *
 * Align the record to cover any gaps created through the deletion of
 * records within the pruning space.  If we were to just delete the records
 * there would be gaps which in turn would cause a snapshot that is NOT on
 * a pruning boundary to appear corrupt to the user.  Forcing alignment
 * of the create_tid and delete_tid for retained records 'reconnects'
 * the previously contiguous space, making it contiguous again after the
 * deletions.
 *
 * The use of a reverse iteration allows us to safely align the records and
 * related elements without creating temporary overlaps.  XXX we should
 * add ordering dependancies for record buffers to guarantee consistency
 * during recovery.
 */
static int
realign_prune(struct hammer_ioc_prune *prune,
	      hammer_cursor_t cursor, int realign_cre, int realign_del)
{
	struct hammer_ioc_prune_elm *scan;
	hammer_btree_elm_t elm;
	hammer_tid_t delta;
	hammer_tid_t tid;
	int error;

	hammer_cursor_downgrade(cursor);

	elm = &cursor->node->ondisk->elms[cursor->index];
	++prune->stat_realignments;

	/*
	 * Align the create_tid.  By doing a reverse iteration we guarantee
	 * that all records after our current record have already been
	 * aligned, allowing us to safely correct the right-hand-boundary
	 * (because no record to our right is otherwise exactly matching
	 * will have a create_tid to the left of our aligned create_tid).
	 */
	error = 0;
	if (realign_cre >= 0) {
		scan = &prune->elms[realign_cre];

		delta = (elm->leaf.base.create_tid - scan->beg_tid) % 
			scan->mod_tid;
		if (delta) {
			tid = elm->leaf.base.create_tid - delta + scan->mod_tid;

			/* can EDEADLK */
			error = hammer_btree_correct_rhb(cursor, tid + 1);
			if (error == 0) {
				error = hammer_btree_extract(cursor,
						     HAMMER_CURSOR_GET_LEAF);
			}
			if (error == 0) {
				/* can EDEADLK */
				error = hammer_cursor_upgrade(cursor);
			}
			if (error == 0) {
				hammer_modify_node(cursor->trans, cursor->node,
					    &elm->leaf.base.create_tid,
					    sizeof(elm->leaf.base.create_tid));
				elm->leaf.base.create_tid = tid;
				hammer_modify_node_done(cursor->node);
			}
		}
	}

	/*
	 * Align the delete_tid.  This only occurs if the record is historical
	 * was deleted at some point.  Realigning the delete_tid does not
	 * move the record within the B-Tree but may cause it to temporarily
	 * overlap a record that has not yet been pruned.
	 */
	if (error == 0 && realign_del >= 0) {
		scan = &prune->elms[realign_del];

		delta = (elm->leaf.base.delete_tid - scan->beg_tid) % 
			scan->mod_tid;
		if (delta) {
			error = hammer_btree_extract(cursor,
						     HAMMER_CURSOR_GET_LEAF);
			if (error == 0) {
				hammer_modify_node(cursor->trans, cursor->node,
					    &elm->leaf.base.delete_tid,
					    sizeof(elm->leaf.base.delete_tid));
				elm->leaf.base.delete_tid =
					    elm->leaf.base.delete_tid -
					    delta + scan->mod_tid;
				hammer_modify_node_done(cursor->node);
			}
		}
	}
	return (error);
}
コード例 #4
0
ファイル: hammer_reblock.c プロジェクト: bradla/hammer-linux
/*
 * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
 *
 * XXX We have no visibility into internal B-Tree nodes at the moment,
 * only leaf nodes.
 */
static int
hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	hammer_mount_t hmp;
	hammer_off_t tmp_offset;
	struct hammer_btree_leaf_elm leaf;
	int error;
	int bytes;
	int cur;
	int iocflags;

	error = 0;
	hmp = cursor->trans->hmp;

	/*
	 * Reblock data.  Note that data embedded in a record is reblocked
	 * by the record reblock code.  Data processing only occurs at leaf
	 * nodes and for RECORD element types.
	 */
	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
		goto skip;
	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
		return(0);
	tmp_offset = elm->leaf.data_offset;
	if (tmp_offset == 0)
		goto skip;
	if (error)
		goto skip;

	/*
	 * NOTE: Localization restrictions may also have been set-up, we can't
	 * just set the match flags willy-nilly here.
	 */
	switch(elm->leaf.base.rec_type) {
	case HAMMER_RECTYPE_INODE:
		iocflags = HAMMER_IOC_DO_INODES;
		break;
	case HAMMER_RECTYPE_EXT:
	case HAMMER_RECTYPE_FIX:
	case HAMMER_RECTYPE_PFS:
	case HAMMER_RECTYPE_DIRENTRY:
		iocflags = HAMMER_IOC_DO_DIRS;
		break;
	case HAMMER_RECTYPE_DATA:
	case HAMMER_RECTYPE_DB:
		iocflags = HAMMER_IOC_DO_DATA;
		break;
	default:
		iocflags = 0;
		break;
	}
	if (reblock->head.flags & iocflags) {
		++reblock->data_count;
		reblock->data_byte_count += elm->leaf.data_len;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			kprintf("D %6d/%d\n", bytes, reblock->free_level);
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			/*
			 * This is nasty, the uncache code may have to get
			 * vnode locks and because of that we can't hold
			 * the cursor locked.
			 */
			leaf = elm->leaf;
			hammer_unlock_cursor(cursor, 0);
			hammer_io_direct_uncache(hmp, &leaf);
			hammer_lock_cursor(cursor, 0);
			if (cursor->flags & HAMMER_CURSOR_RETEST) {
				kprintf("hammer: retest after uncache\n");
				error = EDEADLK;
			} else {
				KKASSERT(bcmp(&elm->leaf, &leaf, sizeof(leaf)) == 0);
			}
			if (error == 0)
				error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				error = hammer_reblock_data(reblock,
							    cursor, elm);
			}
			if (error == 0) {
				++reblock->data_moves;
				reblock->data_byte_moves += elm->leaf.data_len;
			}
		}
	}

skip:
	/*
	 * Reblock a B-Tree internal or leaf node.
	 */
	tmp_offset = cursor->node->node_offset;
	if (cursor->index == 0 &&
	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
		++reblock->btree_count;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			kprintf("B %6d/%d\n", bytes, reblock->free_level);
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				if (cursor->parent)
					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
				else
					elm = NULL;
				switch(cursor->node->ondisk->type) {
				case HAMMER_BTREE_TYPE_LEAF:
					error = hammer_reblock_leaf_node(
							reblock, cursor, elm);
					break;
				case HAMMER_BTREE_TYPE_INTERNAL:
					error = hammer_reblock_int_node(
							reblock, cursor, elm);
					break;
				default:
					panic("Illegal B-Tree node type");
				}
			}
			if (error == 0) {
				++reblock->btree_moves;
			}
		}
	}

	hammer_cursor_downgrade(cursor);
	return(error);
}