Ejemplo n.º 1
0
/*
 * Reblock a record's data.  Both the B-Tree element and record pointers
 * to the data must be adjusted.
 */
static int
hammer_reblock_data(struct hammer_ioc_reblock *reblock,
		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	hammer_buffer_t data_buffer = NULL;
	hammer_off_t odata_offset;
	hammer_off_t ndata_offset;
	int error;
	void *ndata;

	error = hammer_btree_extract_data(cursor);
	if (error)
		return (error);
	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
				  elm->leaf.base.rec_type,
				  &ndata_offset, &data_buffer,
				  0, &error);
	if (error)
		goto done;
	hammer_io_notmeta(data_buffer);

	/*
	 * Move the data.  Note that we must invalidate any cached
	 * data buffer in the cursor before calling blockmap_free.
	 * The blockmap_free may free up the entire big-block and
	 * will not be able to invalidate it if the cursor is holding
	 * a data buffer cached in that big-block.
	 */
	hammer_modify_buffer_noundo(cursor->trans, data_buffer);
	bcopy(cursor->data, ndata, elm->leaf.data_len);
	hammer_modify_buffer_done(data_buffer);
	hammer_cursor_invalidate_cache(cursor);

	hammer_blockmap_free(cursor->trans,
			     elm->leaf.data_offset, elm->leaf.data_len);

	hammer_modify_node(cursor->trans, cursor->node,
			   &elm->leaf.data_offset, sizeof(hammer_off_t));
	odata_offset = elm->leaf.data_offset;
	elm->leaf.data_offset = ndata_offset;
	hammer_modify_node_done(cursor->node);

	if (hammer_debug_general & 0x4000) {
		hdkprintf("%08x %016jx -> %016jx\n",
			(elm ? elm->base.localization : -1),
			(intmax_t)odata_offset,
			(intmax_t)ndata_offset);
	}
done:
	if (data_buffer)
		hammer_rel_buffer(data_buffer, 0);
	return (error);
}
Ejemplo n.º 2
0
/*
 * nnode is a newly allocated node, and now elm becomes the node
 * element within nnode's parent that represents a pointer to nnode,
 * or nnode becomes the root node if elm does not exist.
 */
static void
hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
		 hammer_node_t onode, hammer_node_t nnode)
{
	int error, i;

	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));

	/*
	 * Adjust the parent's pointer to us first.
	 */
	if (elm) {
		/*
		 * We are not the root of the B-Tree
		 */
		KKASSERT(hammer_is_internal_node_elm(elm));
		hammer_modify_node(cursor->trans, cursor->parent,
				   &elm->internal.subtree_offset,
				   sizeof(elm->internal.subtree_offset));
		elm->internal.subtree_offset = nnode->node_offset;
		hammer_modify_node_done(cursor->parent);
	} else {
		/*
		 * We are the root of the B-Tree
		 */
		hammer_volume_t volume;
		volume = hammer_get_root_volume(cursor->trans->hmp, &error);
		KKASSERT(error == 0);

		hammer_modify_volume_field(cursor->trans, volume,
					   vol0_btree_root);
		volume->ondisk->vol0_btree_root = nnode->node_offset;
		hammer_modify_volume_done(volume);
		hammer_rel_volume(volume, 0);
	}

	/*
	 * Now adjust our children's pointers to us
	 * if we are an internal node.
	 */
	if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
		for (i = 0; i < nnode->ondisk->count; ++i) {
			error = btree_set_parent_of_child(cursor->trans, nnode,
					&nnode->ondisk->elms[i]);
			if (error)
				hpanic("reblock internal node: fixup problem");
		}
	}
}
Ejemplo n.º 3
0
/*
 * Reblock a record's data.  Both the B-Tree element and record pointers
 * to the data must be adjusted.
 */
static int
hammer_reblock_data(struct hammer_ioc_reblock *reblock,
		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	struct hammer_buffer *data_buffer = NULL;
	hammer_off_t ndata_offset;
	int error;
	void *ndata;

	error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
					     HAMMER_CURSOR_GET_LEAF);
	if (error)
		return (error);
	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
				  elm->leaf.base.rec_type,
				  &ndata_offset, &data_buffer,
				  0, &error);
	if (error)
		goto done;
	hammer_io_notmeta(data_buffer);

	/*
	 * Move the data.  Note that we must invalidate any cached
	 * data buffer in the cursor before calling blockmap_free.
	 * The blockmap_free may free up the entire large-block and
	 * will not be able to invalidate it if the cursor is holding
	 * a data buffer cached in that large block.
	 */
	hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0);
	bcopy(cursor->data, ndata, elm->leaf.data_len);
	hammer_modify_buffer_done(data_buffer);
	hammer_cursor_invalidate_cache(cursor);

	hammer_blockmap_free(cursor->trans,
			     elm->leaf.data_offset, elm->leaf.data_len);

	hammer_modify_node(cursor->trans, cursor->node,
			   &elm->leaf.data_offset, sizeof(hammer_off_t));
	elm->leaf.data_offset = ndata_offset;
	hammer_modify_node_done(cursor->node);

done:
	if (data_buffer)
		hammer_rel_buffer(data_buffer, 0);
	return (error);
}
Ejemplo n.º 4
0
/*
 * Reblock a record's data.  Both the B-Tree element and record pointers
 * to the data must be adjusted.
 */
static int
hammer_reblock_data(struct hammer_ioc_reblock *reblock,
		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	struct hammer_buffer *data_buffer = NULL;
	hammer_off_t ndata_offset;
	int error;
	void *ndata;

	error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
					     HAMMER_CURSOR_GET_LEAF);
	if (error)
		return (error);
	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
				  elm->leaf.base.rec_type,
				  &ndata_offset, &data_buffer, &error);
	if (error)
		goto done;

	/*
	 * Move the data
	 */
	hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0);
	bcopy(cursor->data, ndata, elm->leaf.data_len);
	hammer_modify_buffer_done(data_buffer);

	hammer_blockmap_free(cursor->trans,
			     elm->leaf.data_offset, elm->leaf.data_len);

	hammer_modify_node(cursor->trans, cursor->node,
			   &elm->leaf.data_offset, sizeof(hammer_off_t));
	elm->leaf.data_offset = ndata_offset;
	hammer_modify_node_done(cursor->node);

done:
	if (data_buffer)
		hammer_rel_buffer(data_buffer, 0);
	return (error);
}
Ejemplo n.º 5
0
/*
 * NOTE: THIS CODE HAS BEEN REMOVED!  Pruning no longer attempts to realign
 *	 adjacent records because it seriously interferes with every 
 *	 mirroring algorithm I could come up with.
 *
 *	 This means that historical accesses beyond the first snapshot
 *	 softlink should be on snapshot boundaries only.  Historical
 *	 accesses from "now" to the first snapshot softlink continue to
 *	 be fine-grained.
 *
 * NOTE: It also looks like there's a bug in the removed code.  It is believed
 *	 that create_tid can sometimes get set to 0xffffffffffffffff.  Just as
 *	 well we no longer try to do this fancy shit.  Probably the attempt to
 *	 correct the rhb is blowing up the cursor's indexing or addressing mapping.
 *
 * Align the record to cover any gaps created through the deletion of
 * records within the pruning space.  If we were to just delete the records
 * there would be gaps which in turn would cause a snapshot that is NOT on
 * a pruning boundary to appear corrupt to the user.  Forcing alignment
 * of the create_tid and delete_tid for retained records 'reconnects'
 * the previously contiguous space, making it contiguous again after the
 * deletions.
 *
 * The use of a reverse iteration allows us to safely align the records and
 * related elements without creating temporary overlaps.  XXX we should
 * add ordering dependancies for record buffers to guarantee consistency
 * during recovery.
 */
static int
realign_prune(struct hammer_ioc_prune *prune,
	      hammer_cursor_t cursor, int realign_cre, int realign_del)
{
	struct hammer_ioc_prune_elm *scan;
	hammer_btree_elm_t elm;
	hammer_tid_t delta;
	hammer_tid_t tid;
	int error;

	hammer_cursor_downgrade(cursor);

	elm = &cursor->node->ondisk->elms[cursor->index];
	++prune->stat_realignments;

	/*
	 * Align the create_tid.  By doing a reverse iteration we guarantee
	 * that all records after our current record have already been
	 * aligned, allowing us to safely correct the right-hand-boundary
	 * (because no record to our right is otherwise exactly matching
	 * will have a create_tid to the left of our aligned create_tid).
	 */
	error = 0;
	if (realign_cre >= 0) {
		scan = &prune->elms[realign_cre];

		delta = (elm->leaf.base.create_tid - scan->beg_tid) % 
			scan->mod_tid;
		if (delta) {
			tid = elm->leaf.base.create_tid - delta + scan->mod_tid;

			/* can EDEADLK */
			error = hammer_btree_correct_rhb(cursor, tid + 1);
			if (error == 0) {
				error = hammer_btree_extract(cursor,
						     HAMMER_CURSOR_GET_LEAF);
			}
			if (error == 0) {
				/* can EDEADLK */
				error = hammer_cursor_upgrade(cursor);
			}
			if (error == 0) {
				hammer_modify_node(cursor->trans, cursor->node,
					    &elm->leaf.base.create_tid,
					    sizeof(elm->leaf.base.create_tid));
				elm->leaf.base.create_tid = tid;
				hammer_modify_node_done(cursor->node);
			}
		}
	}

	/*
	 * Align the delete_tid.  This only occurs if the record is historical
	 * was deleted at some point.  Realigning the delete_tid does not
	 * move the record within the B-Tree but may cause it to temporarily
	 * overlap a record that has not yet been pruned.
	 */
	if (error == 0 && realign_del >= 0) {
		scan = &prune->elms[realign_del];

		delta = (elm->leaf.base.delete_tid - scan->beg_tid) % 
			scan->mod_tid;
		if (delta) {
			error = hammer_btree_extract(cursor,
						     HAMMER_CURSOR_GET_LEAF);
			if (error == 0) {
				hammer_modify_node(cursor->trans, cursor->node,
					    &elm->leaf.base.delete_tid,
					    sizeof(elm->leaf.base.delete_tid));
				elm->leaf.base.delete_tid =
					    elm->leaf.base.delete_tid -
					    delta + scan->mod_tid;
				hammer_modify_node_done(cursor->node);
			}
		}
	}
	return (error);
}
Ejemplo n.º 6
0
/*
 * Reblock a B-Tree internal node.  The parent must be adjusted to point to
 * the new copy of the internal node, and the node's children's parent
 * pointers must also be adjusted to point to the new copy.
 *
 * elm is a pointer to the parent element pointing at cursor.node.
 */
static int
hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	struct hammer_node_lock lockroot;
	hammer_node_t onode;
	hammer_node_t nnode;
	int error;
	int i;

	hammer_node_lock_init(&lockroot, cursor->node);
	error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL);
	if (error)
		goto done;

	onode = cursor->node;
	nnode = hammer_alloc_btree(cursor->trans, 0, &error);

	if (nnode == NULL)
		goto done;

	/*
	 * Move the node.  Adjust the parent's pointer to us first.
	 */
	hammer_lock_ex(&nnode->lock);
	hammer_modify_node_noundo(cursor->trans, nnode);
	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));

	if (elm) {
		/*
		 * We are not the root of the B-Tree 
		 */
		hammer_modify_node(cursor->trans, cursor->parent,
				   &elm->internal.subtree_offset,
				   sizeof(elm->internal.subtree_offset));
		elm->internal.subtree_offset = nnode->node_offset;
		hammer_modify_node_done(cursor->parent);
	} else {
		/*
		 * We are the root of the B-Tree
		 */
                hammer_volume_t volume;
                        
                volume = hammer_get_root_volume(cursor->trans->hmp, &error);
                KKASSERT(error == 0);

                hammer_modify_volume_field(cursor->trans, volume,
					   vol0_btree_root);
                volume->ondisk->vol0_btree_root = nnode->node_offset;
                hammer_modify_volume_done(volume);
                hammer_rel_volume(volume, 0);
        }

	/*
	 * Now adjust our children's pointers to us.
	 */
	for (i = 0; i < nnode->ondisk->count; ++i) {
		elm = &nnode->ondisk->elms[i];
		error = btree_set_parent(cursor->trans, nnode, elm);
		if (error)
			panic("reblock internal node: fixup problem");
	}

	/*
	 * Clean up.
	 *
	 * The new node replaces the current node in the cursor.  The cursor
	 * expects it to be locked so leave it locked.  Discard onode.
	 */
	hammer_cursor_replaced_node(onode, nnode);
	hammer_delete_node(cursor->trans, onode);

	if (hammer_debug_general & 0x4000) {
		kprintf("REBLOCK INODE %016llx -> %016llx\n",
			(long long)onode->node_offset,
			(long long)nnode->node_offset);
	}
	hammer_modify_node_done(nnode);
	cursor->node = nnode;

	hammer_unlock(&onode->lock);
	hammer_rel_node(onode);

done:
	hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL);
	return (error);
}
Ejemplo n.º 7
0
/*
 * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
 * the new copy of the leaf node.
 *
 * elm is a pointer to the parent element pointing at cursor.node.
 */
static int
hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	hammer_node_t onode;
	hammer_node_t nnode;
	int error;

	/*
	 * Don't supply a hint when allocating the leaf.  Fills are done
	 * from the leaf upwards.
	 */
	onode = cursor->node;
	nnode = hammer_alloc_btree(cursor->trans, 0, &error);

	if (nnode == NULL)
		return (error);

	/*
	 * Move the node
	 */
	hammer_lock_ex(&nnode->lock);
	hammer_modify_node_noundo(cursor->trans, nnode);
	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));

	if (elm) {
		/*
		 * We are not the root of the B-Tree 
		 */
		hammer_modify_node(cursor->trans, cursor->parent,
				   &elm->internal.subtree_offset,
				   sizeof(elm->internal.subtree_offset));
		elm->internal.subtree_offset = nnode->node_offset;
		hammer_modify_node_done(cursor->parent);
	} else {
		/*
		 * We are the root of the B-Tree
		 */
                hammer_volume_t volume;
                        
                volume = hammer_get_root_volume(cursor->trans->hmp, &error);
                KKASSERT(error == 0);

                hammer_modify_volume_field(cursor->trans, volume,
					   vol0_btree_root);
                volume->ondisk->vol0_btree_root = nnode->node_offset;
                hammer_modify_volume_done(volume);
                hammer_rel_volume(volume, 0);
    }

	hammer_cursor_replaced_node(onode, nnode);
	hammer_delete_node(cursor->trans, onode);

	if (hammer_debug_general & 0x4000) {
		kprintf("REBLOCK LNODE %016llx -> %016llx\n",
			(long long)onode->node_offset,
			(long long)nnode->node_offset);
	}
	hammer_modify_node_done(nnode);
	cursor->node = nnode;

	hammer_unlock(&onode->lock);
	hammer_rel_node(onode);

	return (error);
}
Ejemplo n.º 8
0
int
hammer_ioc_dedup(hammer_transaction_t trans, hammer_inode_t ip,
		 struct hammer_ioc_dedup *dedup)
{
	struct hammer_cursor cursor1, cursor2;
	int error;
	int seq;

	/*
	 * Enforce hammer filesystem version requirements
	 */
	if (trans->hmp->version < HAMMER_VOL_VERSION_FIVE) {
		kprintf("hammer: Filesystem must be upgraded to v5 "
			"before you can run dedup\n");
		return (EOPNOTSUPP); /* 95*/
	}

	/*
	 * Cursor1, return an error -> candidate goes to pass2 list
	 */
	error = hammer_init_cursor(trans, &cursor1, NULL, NULL);
	if (error)
		goto done_cursor;
	cursor1.key_beg = dedup->elm1;
	cursor1.flags |= HAMMER_CURSOR_BACKEND;

	error = hammer_btree_lookup(&cursor1);
	if (error)
		goto done_cursor;
	error = hammer_btree_extract(&cursor1, HAMMER_CURSOR_GET_LEAF |
						HAMMER_CURSOR_GET_DATA);
	if (error)
		goto done_cursor;

	/*
	 * Cursor2, return an error -> candidate goes to pass2 list
	 */
	error = hammer_init_cursor(trans, &cursor2, NULL, NULL);
	if (error)
		goto done_cursors;
	cursor2.key_beg = dedup->elm2;
	cursor2.flags |= HAMMER_CURSOR_BACKEND;

	error = hammer_btree_lookup(&cursor2);
	if (error)
		goto done_cursors;
	error = hammer_btree_extract(&cursor2, HAMMER_CURSOR_GET_LEAF |
						HAMMER_CURSOR_GET_DATA);
	if (error)
		goto done_cursors;

	/*
	 * Zone validation. We can't de-dup any of the other zones
	 * (BTREE or META) or bad things will happen.
	 *
	 * Return with error = 0, but set an INVALID_ZONE flag.
	 */
	error = validate_zone(cursor1.leaf->data_offset) +
			    validate_zone(cursor2.leaf->data_offset);
	if (error) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_INVALID_ZONE;
		error = 0;
		goto done_cursors;
	}

	/*
	 * Comparison checks
	 *
	 * If zones don't match or data_len fields aren't the same
	 * we consider it to be a comparison failure.
	 *
	 * Return with error = 0, but set a CMP_FAILURE flag.
	 */
	if ((cursor1.leaf->data_offset & HAMMER_OFF_ZONE_MASK) !=
	    (cursor2.leaf->data_offset & HAMMER_OFF_ZONE_MASK)) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}
	if (cursor1.leaf->data_len != cursor2.leaf->data_len) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}

	/* byte-by-byte comparison to be sure */
	if (bcmp(cursor1.data, cursor2.data, cursor1.leaf->data_len)) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}

	/*
	 * Upgrade both cursors together to an exclusive lock
	 *
	 * Return an error -> candidate goes to pass2 list
	 */
	hammer_sync_lock_sh(trans);
	error = hammer_cursor_upgrade2(&cursor1, &cursor2);
	if (error) {
		hammer_sync_unlock(trans);
		goto done_cursors;
	}

	error = hammer_blockmap_dedup(cursor1.trans,
			cursor1.leaf->data_offset, cursor1.leaf->data_len);
	if (error) {
		if (error == ERANGE) {
			/*
			 * Return with error = 0, but set an UNDERFLOW flag
			 */
			dedup->head.flags |= HAMMER_IOC_DEDUP_UNDERFLOW;
			error = 0;
			goto downgrade_cursors;
		} else {
			/*
			 * Return an error -> block goes to pass2 list
			 */
			goto downgrade_cursors;
		}
	}

	/*
	 * The cursor2's cache must be invalidated before calling
	 * hammer_blockmap_free(), otherwise it will not be able to
	 * invalidate the underlying data buffer.
	 */
	hammer_cursor_invalidate_cache(&cursor2);
	hammer_blockmap_free(cursor2.trans,
			cursor2.leaf->data_offset, cursor2.leaf->data_len);

	hammer_modify_node(cursor2.trans, cursor2.node,
			&cursor2.leaf->data_offset, sizeof(hammer_off_t));
	cursor2.leaf->data_offset = cursor1.leaf->data_offset;
	hammer_modify_node_done(cursor2.node);

downgrade_cursors:
	hammer_cursor_downgrade2(&cursor1, &cursor2);
	hammer_sync_unlock(trans);
done_cursors:
	hammer_done_cursor(&cursor2);
done_cursor:
	hammer_done_cursor(&cursor1);

	/*
	 * Avoid deadlocking the buffer cache
	 */
	seq = trans->hmp->flusher.done;
	while (hammer_flusher_meta_halflimit(trans->hmp) ||
	       hammer_flusher_undo_exhausted(trans, 2)) {
		hammer_flusher_wait(trans->hmp, seq);
		seq = hammer_flusher_async_one(trans->hmp);
	}
	return (error);
}