/* * Reblock a record's data. Both the B-Tree element and record pointers * to the data must be adjusted. */ static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_buffer_t data_buffer = NULL; hammer_off_t odata_offset; hammer_off_t ndata_offset; int error; void *ndata; error = hammer_btree_extract_data(cursor); if (error) return (error); ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, elm->leaf.base.rec_type, &ndata_offset, &data_buffer, 0, &error); if (error) goto done; hammer_io_notmeta(data_buffer); /* * Move the data. Note that we must invalidate any cached * data buffer in the cursor before calling blockmap_free. * The blockmap_free may free up the entire big-block and * will not be able to invalidate it if the cursor is holding * a data buffer cached in that big-block. */ hammer_modify_buffer_noundo(cursor->trans, data_buffer); bcopy(cursor->data, ndata, elm->leaf.data_len); hammer_modify_buffer_done(data_buffer); hammer_cursor_invalidate_cache(cursor); hammer_blockmap_free(cursor->trans, elm->leaf.data_offset, elm->leaf.data_len); hammer_modify_node(cursor->trans, cursor->node, &elm->leaf.data_offset, sizeof(hammer_off_t)); odata_offset = elm->leaf.data_offset; elm->leaf.data_offset = ndata_offset; hammer_modify_node_done(cursor->node); if (hammer_debug_general & 0x4000) { hdkprintf("%08x %016jx -> %016jx\n", (elm ? elm->base.localization : -1), (intmax_t)odata_offset, (intmax_t)ndata_offset); } done: if (data_buffer) hammer_rel_buffer(data_buffer, 0); return (error); }
/* * nnode is a newly allocated node, and now elm becomes the node * element within nnode's parent that represents a pointer to nnode, * or nnode becomes the root node if elm does not exist. */ static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, hammer_node_t onode, hammer_node_t nnode) { int error, i; bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); /* * Adjust the parent's pointer to us first. */ if (elm) { /* * We are not the root of the B-Tree */ KKASSERT(hammer_is_internal_node_elm(elm)); hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } /* * Now adjust our children's pointers to us * if we are an internal node. */ if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { for (i = 0; i < nnode->ondisk->count; ++i) { error = btree_set_parent_of_child(cursor->trans, nnode, &nnode->ondisk->elms[i]); if (error) hpanic("reblock internal node: fixup problem"); } } }
/* * Reblock a record's data. Both the B-Tree element and record pointers * to the data must be adjusted. */ static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_buffer *data_buffer = NULL; hammer_off_t ndata_offset; int error; void *ndata; error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | HAMMER_CURSOR_GET_LEAF); if (error) return (error); ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, elm->leaf.base.rec_type, &ndata_offset, &data_buffer, 0, &error); if (error) goto done; hammer_io_notmeta(data_buffer); /* * Move the data. Note that we must invalidate any cached * data buffer in the cursor before calling blockmap_free. * The blockmap_free may free up the entire large-block and * will not be able to invalidate it if the cursor is holding * a data buffer cached in that large block. */ hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); bcopy(cursor->data, ndata, elm->leaf.data_len); hammer_modify_buffer_done(data_buffer); hammer_cursor_invalidate_cache(cursor); hammer_blockmap_free(cursor->trans, elm->leaf.data_offset, elm->leaf.data_len); hammer_modify_node(cursor->trans, cursor->node, &elm->leaf.data_offset, sizeof(hammer_off_t)); elm->leaf.data_offset = ndata_offset; hammer_modify_node_done(cursor->node); done: if (data_buffer) hammer_rel_buffer(data_buffer, 0); return (error); }
/* * Reblock a record's data. Both the B-Tree element and record pointers * to the data must be adjusted. */ static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_buffer *data_buffer = NULL; hammer_off_t ndata_offset; int error; void *ndata; error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | HAMMER_CURSOR_GET_LEAF); if (error) return (error); ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, elm->leaf.base.rec_type, &ndata_offset, &data_buffer, &error); if (error) goto done; /* * Move the data */ hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); bcopy(cursor->data, ndata, elm->leaf.data_len); hammer_modify_buffer_done(data_buffer); hammer_blockmap_free(cursor->trans, elm->leaf.data_offset, elm->leaf.data_len); hammer_modify_node(cursor->trans, cursor->node, &elm->leaf.data_offset, sizeof(hammer_off_t)); elm->leaf.data_offset = ndata_offset; hammer_modify_node_done(cursor->node); done: if (data_buffer) hammer_rel_buffer(data_buffer, 0); return (error); }
/* * NOTE: THIS CODE HAS BEEN REMOVED! Pruning no longer attempts to realign * adjacent records because it seriously interferes with every * mirroring algorithm I could come up with. * * This means that historical accesses beyond the first snapshot * softlink should be on snapshot boundaries only. Historical * accesses from "now" to the first snapshot softlink continue to * be fine-grained. * * NOTE: It also looks like there's a bug in the removed code. It is believed * that create_tid can sometimes get set to 0xffffffffffffffff. Just as * well we no longer try to do this fancy shit. Probably the attempt to * correct the rhb is blowing up the cursor's indexing or addressing mapping. * * Align the record to cover any gaps created through the deletion of * records within the pruning space. If we were to just delete the records * there would be gaps which in turn would cause a snapshot that is NOT on * a pruning boundary to appear corrupt to the user. Forcing alignment * of the create_tid and delete_tid for retained records 'reconnects' * the previously contiguous space, making it contiguous again after the * deletions. * * The use of a reverse iteration allows us to safely align the records and * related elements without creating temporary overlaps. XXX we should * add ordering dependancies for record buffers to guarantee consistency * during recovery. */ static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor, int realign_cre, int realign_del) { struct hammer_ioc_prune_elm *scan; hammer_btree_elm_t elm; hammer_tid_t delta; hammer_tid_t tid; int error; hammer_cursor_downgrade(cursor); elm = &cursor->node->ondisk->elms[cursor->index]; ++prune->stat_realignments; /* * Align the create_tid. By doing a reverse iteration we guarantee * that all records after our current record have already been * aligned, allowing us to safely correct the right-hand-boundary * (because no record to our right is otherwise exactly matching * will have a create_tid to the left of our aligned create_tid). */ error = 0; if (realign_cre >= 0) { scan = &prune->elms[realign_cre]; delta = (elm->leaf.base.create_tid - scan->beg_tid) % scan->mod_tid; if (delta) { tid = elm->leaf.base.create_tid - delta + scan->mod_tid; /* can EDEADLK */ error = hammer_btree_correct_rhb(cursor, tid + 1); if (error == 0) { error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF); } if (error == 0) { /* can EDEADLK */ error = hammer_cursor_upgrade(cursor); } if (error == 0) { hammer_modify_node(cursor->trans, cursor->node, &elm->leaf.base.create_tid, sizeof(elm->leaf.base.create_tid)); elm->leaf.base.create_tid = tid; hammer_modify_node_done(cursor->node); } } } /* * Align the delete_tid. This only occurs if the record is historical * was deleted at some point. Realigning the delete_tid does not * move the record within the B-Tree but may cause it to temporarily * overlap a record that has not yet been pruned. */ if (error == 0 && realign_del >= 0) { scan = &prune->elms[realign_del]; delta = (elm->leaf.base.delete_tid - scan->beg_tid) % scan->mod_tid; if (delta) { error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF); if (error == 0) { hammer_modify_node(cursor->trans, cursor->node, &elm->leaf.base.delete_tid, sizeof(elm->leaf.base.delete_tid)); elm->leaf.base.delete_tid = elm->leaf.base.delete_tid - delta + scan->mod_tid; hammer_modify_node_done(cursor->node); } } } return (error); }
/* * Reblock a B-Tree internal node. The parent must be adjusted to point to * the new copy of the internal node, and the node's children's parent * pointers must also be adjusted to point to the new copy. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_node_lock lockroot; hammer_node_t onode; hammer_node_t nnode; int error; int i; hammer_node_lock_init(&lockroot, cursor->node); error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); if (error) goto done; onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) goto done; /* * Move the node. Adjust the parent's pointer to us first. */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } /* * Now adjust our children's pointers to us. */ for (i = 0; i < nnode->ondisk->count; ++i) { elm = &nnode->ondisk->elms[i]; error = btree_set_parent(cursor->trans, nnode, elm); if (error) panic("reblock internal node: fixup problem"); } /* * Clean up. * * The new node replaces the current node in the cursor. The cursor * expects it to be locked so leave it locked. Discard onode. */ hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK INODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); done: hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); return (error); }
/* * Reblock a B-Tree leaf node. The parent must be adjusted to point to * the new copy of the leaf node. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_node_t onode; hammer_node_t nnode; int error; /* * Don't supply a hint when allocating the leaf. Fills are done * from the leaf upwards. */ onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) return (error); /* * Move the node */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK LNODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); return (error); }
int hammer_ioc_dedup(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_dedup *dedup) { struct hammer_cursor cursor1, cursor2; int error; int seq; /* * Enforce hammer filesystem version requirements */ if (trans->hmp->version < HAMMER_VOL_VERSION_FIVE) { kprintf("hammer: Filesystem must be upgraded to v5 " "before you can run dedup\n"); return (EOPNOTSUPP); /* 95*/ } /* * Cursor1, return an error -> candidate goes to pass2 list */ error = hammer_init_cursor(trans, &cursor1, NULL, NULL); if (error) goto done_cursor; cursor1.key_beg = dedup->elm1; cursor1.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_lookup(&cursor1); if (error) goto done_cursor; error = hammer_btree_extract(&cursor1, HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA); if (error) goto done_cursor; /* * Cursor2, return an error -> candidate goes to pass2 list */ error = hammer_init_cursor(trans, &cursor2, NULL, NULL); if (error) goto done_cursors; cursor2.key_beg = dedup->elm2; cursor2.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_lookup(&cursor2); if (error) goto done_cursors; error = hammer_btree_extract(&cursor2, HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA); if (error) goto done_cursors; /* * Zone validation. We can't de-dup any of the other zones * (BTREE or META) or bad things will happen. * * Return with error = 0, but set an INVALID_ZONE flag. */ error = validate_zone(cursor1.leaf->data_offset) + validate_zone(cursor2.leaf->data_offset); if (error) { dedup->head.flags |= HAMMER_IOC_DEDUP_INVALID_ZONE; error = 0; goto done_cursors; } /* * Comparison checks * * If zones don't match or data_len fields aren't the same * we consider it to be a comparison failure. * * Return with error = 0, but set a CMP_FAILURE flag. */ if ((cursor1.leaf->data_offset & HAMMER_OFF_ZONE_MASK) != (cursor2.leaf->data_offset & HAMMER_OFF_ZONE_MASK)) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } if (cursor1.leaf->data_len != cursor2.leaf->data_len) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } /* byte-by-byte comparison to be sure */ if (bcmp(cursor1.data, cursor2.data, cursor1.leaf->data_len)) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } /* * Upgrade both cursors together to an exclusive lock * * Return an error -> candidate goes to pass2 list */ hammer_sync_lock_sh(trans); error = hammer_cursor_upgrade2(&cursor1, &cursor2); if (error) { hammer_sync_unlock(trans); goto done_cursors; } error = hammer_blockmap_dedup(cursor1.trans, cursor1.leaf->data_offset, cursor1.leaf->data_len); if (error) { if (error == ERANGE) { /* * Return with error = 0, but set an UNDERFLOW flag */ dedup->head.flags |= HAMMER_IOC_DEDUP_UNDERFLOW; error = 0; goto downgrade_cursors; } else { /* * Return an error -> block goes to pass2 list */ goto downgrade_cursors; } } /* * The cursor2's cache must be invalidated before calling * hammer_blockmap_free(), otherwise it will not be able to * invalidate the underlying data buffer. */ hammer_cursor_invalidate_cache(&cursor2); hammer_blockmap_free(cursor2.trans, cursor2.leaf->data_offset, cursor2.leaf->data_len); hammer_modify_node(cursor2.trans, cursor2.node, &cursor2.leaf->data_offset, sizeof(hammer_off_t)); cursor2.leaf->data_offset = cursor1.leaf->data_offset; hammer_modify_node_done(cursor2.node); downgrade_cursors: hammer_cursor_downgrade2(&cursor1, &cursor2); hammer_sync_unlock(trans); done_cursors: hammer_done_cursor(&cursor2); done_cursor: hammer_done_cursor(&cursor1); /* * Avoid deadlocking the buffer cache */ seq = trans->hmp->flusher.done; while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_flusher_wait(trans->hmp, seq); seq = hammer_flusher_async_one(trans->hmp); } return (error); }