/* * Reblock a B-Tree internal node. The parent must be adjusted to point to * the new copy of the internal node, and the node's children's parent * pointers must also be adjusted to point to the new copy. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_node_lock lockroot; hammer_node_t onode; hammer_node_t nnode; int error; hammer_node_lock_init(&lockroot, cursor->node); error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); if (error) goto done; /* * Don't supply a hint when allocating the leaf. Fills are done * from the leaf upwards. */ onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) goto done; hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); hammer_move_node(cursor, elm, onode, nnode); /* * Clean up. * * The new node replaces the current node in the cursor. The cursor * expects it to be locked so leave it locked. Discard onode. */ hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { hdkprintf("%08x %016jx -> %016jx\n", (elm ? elm->base.localization : -1), (intmax_t)onode->node_offset, (intmax_t)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); done: hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); return (error); }
/* * Remove a volume. */ int hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { hammer_mount_t hmp = trans->hmp; struct mount *mp = hmp->mp; struct hammer_volume_ondisk ondisk; hammer_volume_t volume; int64_t total_bigblocks, empty_bigblocks; int vol_no; int error = 0; if (mp->mnt_flag & MNT_RDONLY) { hmkprintf(hmp, "Cannot del volume from read-only HAMMER filesystem\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { hmkprintf(hmp, "Another volume operation is in progress!\n"); return (EAGAIN); } if (hmp->nvolumes <= 1) { hammer_unlock(&hmp->volume_lock); hmkprintf(hmp, "No HAMMER volume to delete\n"); return (EINVAL); } /* * find volume by volname */ volume = NULL; HAMMER_VOLUME_NUMBER_FOREACH(hmp, vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); KKASSERT(volume != NULL && error == 0); if (strcmp(volume->vol_name, ioc->device_name) == 0) { break; } hammer_rel_volume(volume, 0); volume = NULL; }
int hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { hammer_mount_t hmp = trans->hmp; struct mount *mp = hmp->mp; struct hammer_volume_ondisk ondisk; hammer_volume_t volume; int64_t total_bigblocks, empty_bigblocks; int free_vol_no = 0; int error; if (mp->mnt_flag & MNT_RDONLY) { hmkprintf(hmp, "Cannot add volume to read-only HAMMER filesystem\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { hmkprintf(hmp, "Another volume operation is in progress!\n"); return (EAGAIN); } if (hmp->nvolumes >= HAMMER_MAX_VOLUMES) { hammer_unlock(&hmp->volume_lock); hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n"); return (EINVAL); } /* * Find an unused volume number. */ while (free_vol_no < HAMMER_MAX_VOLUMES && hammer_volume_number_test(hmp, free_vol_no)) { ++free_vol_no; } if (free_vol_no >= HAMMER_MAX_VOLUMES) { hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n"); error = EINVAL; goto end; } error = hammer_format_volume_header(hmp, ioc, &ondisk, free_vol_no); if (error) goto end; error = hammer_install_volume(hmp, ioc->device_name, NULL, &ondisk); if (error) goto end; hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); volume = hammer_get_volume(hmp, free_vol_no, &error); KKASSERT(volume != NULL && error == 0); error = hammer_format_freemap(trans, volume); KKASSERT(error == 0); error = hammer_count_bigblocks(hmp, volume, &total_bigblocks, &empty_bigblocks); KKASSERT(error == 0); KKASSERT(total_bigblocks == empty_bigblocks); hammer_rel_volume(volume, 0); ++hmp->nvolumes; error = hammer_update_volumes_header(trans, total_bigblocks, empty_bigblocks); KKASSERT(error == 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); if (error) hmkprintf(hmp, "An error occurred: %d\n", error); return (error); }
/* * HAMMER version 4+ REDO support. * * REDO records are used to improve fsync() performance. Instead of having * to go through a complete double-flush cycle involving at least two disk * synchronizations the fsync need only flush UNDO/REDO FIFO buffers through * the related REDO records, which is a single synchronization requiring * no track seeking. If a recovery becomes necessary the recovery code * will generate logical data writes based on the REDO records encountered. * That is, the recovery code will UNDO any partial meta-data/data writes * at the raw disk block level and then REDO the data writes at the logical * level. */ int hammer_generate_redo(hammer_transaction_t trans, hammer_inode_t ip, hammer_off_t file_off, u_int32_t flags, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_redo_t redo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; /* * Setup */ hmp = trans->hmp; root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* * No undo recursion when modifying the root volume */ hammer_modify_volume(NULL, root_volume, NULL, 0); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. * Loop at least once (len might be 0 as a degenerate case). */ for (;;) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { redo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(redo, hmp->undo_seqno ^ 0x40000000); } else { redo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; hammer_modify_buffer(NULL, buffer, NULL, 0); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_redo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_redo += bytes; continue; } /* * When generating an inode-related REDO record we track * the point in the UNDO/REDO FIFO containing the inode's * earliest REDO record. See hammer_generate_redo_sync(). * * redo_fifo_next is cleared when an inode is staged to * the backend and then used to determine how to reassign * redo_fifo_start after the inode flush completes. */ if (ip) { redo->redo_objid = ip->obj_id; redo->redo_localization = ip->obj_localization; if ((ip->flags & HAMMER_INODE_RDIRTY) == 0) { ip->redo_fifo_start = next_offset; if (RB_INSERT(hammer_redo_rb_tree, &hmp->rb_redo_root, ip)) { panic("hammer_generate_redo: " "cannot insert inode %p on " "redo FIFO", ip); } ip->flags |= HAMMER_INODE_RDIRTY; } if (ip->redo_fifo_next == 0) ip->redo_fifo_next = next_offset; } else { redo->redo_objid = 0; redo->redo_localization = 0; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. If no data buffer * is supplied there is no payload. */ if (base == NULL) { n = 0; } else if (n > len) { n = len; } bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_redo) + (int)sizeof(struct hammer_fifo_tail); if (hammer_debug_general & 0x0080) { kprintf("redo %016llx %d %d\n", (long long)next_offset, bytes, n); } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_REDO; redo->head.hdr_size = bytes; redo->head.hdr_seq = hmp->undo_seqno++; redo->head.hdr_crc = 0; redo->redo_mtime = trans->time; redo->redo_offset = file_off; redo->redo_flags = flags; /* * Incremental payload. If no payload we throw the entire * len into redo_data_bytes and will not loop. */ if (base) { redo->redo_data_bytes = n; bcopy(base, redo + 1, n); len -= n; base = (char *)base + n; file_off += n; } else { redo->redo_data_bytes = len; file_off += len; len = 0; } tail = (void *)((char *)redo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_REDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(redo->head)); redo->head.hdr_crc = crc32(redo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&redo->head + 1, bytes - sizeof(redo->head)); undomap->next_offset += bytes; hammer_stats_redo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); redo = (void *)(tail + 1); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); if (len == 0) break; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); /* * Make sure the nominal undo span contains at least one REDO_SYNC, * otherwise the REDO recovery will not be triggered. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && flags != HAMMER_REDO_SYNC) { hammer_generate_redo_sync(trans); } return(error); }
/* * Delete snapshot transaction id(s) from the list of snapshots. */ static int hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_snapshot *snap) { hammer_mount_t hmp = ip->hmp; struct hammer_cursor cursor; int error; /* * Validate structure */ if (snap->count > HAMMER_SNAPS_PER_IOCTL) return (EINVAL); if (snap->index >= snap->count) return (EINVAL); hammer_lock_ex(&hmp->snapshot_lock); again: /* * Look for keys starting after the previous iteration, or at * the beginning if snap->count is 0. */ error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT; cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_ASOF; while (snap->index < snap->count) { cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid; error = hammer_btree_lookup(&cursor); if (error) break; error = hammer_btree_extract_leaf(&cursor); if (error) break; error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, 0, 0, 0, NULL); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } if (error) break; ++snap->index; } snap->head.error = error; hammer_done_cursor(&cursor); hammer_unlock(&hmp->snapshot_lock); return(0); }
/* * Add a snapshot transaction id(s) to the list of snapshots. * * NOTE: Records are created with an allocated TID. If a flush cycle * is in progress the record may be synced in the current flush * cycle and the volume header will reflect the allocation of the * TID, but the synchronization point may not catch up to the * TID until the next flush cycle. */ static int hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_snapshot *snap) { hammer_mount_t hmp = ip->hmp; struct hammer_btree_leaf_elm leaf; struct hammer_cursor cursor; int error; /* * Validate structure */ if (snap->count > HAMMER_SNAPS_PER_IOCTL) return (EINVAL); if (snap->index >= snap->count) return (EINVAL); hammer_lock_ex(&hmp->snapshot_lock); again: /* * Look for keys starting after the previous iteration, or at * the beginning if snap->count is 0. */ error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF; bzero(&leaf, sizeof(leaf)); leaf.base.obj_id = HAMMER_OBJID_ROOT; leaf.base.rec_type = HAMMER_RECTYPE_SNAPSHOT; leaf.base.create_tid = hammer_alloc_tid(hmp, 1); leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; leaf.data_len = sizeof(struct hammer_snapshot_data); while (snap->index < snap->count) { leaf.base.key = (int64_t)snap->snaps[snap->index].tid; cursor.key_beg = leaf.base; error = hammer_btree_lookup(&cursor); if (error == 0) { error = EEXIST; break; } /* * NOTE: Must reload key_beg after an ASOF search because * the create_tid may have been modified during the * search. */ cursor.flags &= ~HAMMER_CURSOR_ASOF; cursor.key_beg = leaf.base; error = hammer_create_at_cursor(&cursor, &leaf, &snap->snaps[snap->index], HAMMER_CREATE_MODE_SYS); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } cursor.flags |= HAMMER_CURSOR_ASOF; if (error) break; ++snap->index; } snap->head.error = error; hammer_done_cursor(&cursor); hammer_unlock(&hmp->snapshot_lock); return(0); }
/* * Set version info */ static int hammer_ioc_set_version(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_version *ver) { hammer_mount_t hmp = trans->hmp; struct hammer_cursor cursor; hammer_volume_t volume; int error; int over = hmp->version; /* * Generally do not allow downgrades. However, version 4 can * be downgraded to version 3. */ if (ver->cur_version < hmp->version) { if (!(ver->cur_version == 3 && hmp->version == 4)) return(EINVAL); } if (ver->cur_version == hmp->version) return(0); if (ver->cur_version > HAMMER_VOL_VERSION_MAX) return(EINVAL); if (hmp->ronly) return(EROFS); /* * Update the root volume header and the version cached in * the hammer_mount structure. */ error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) goto failed; hammer_lock_ex(&hmp->flusher.finalize_lock); hammer_sync_lock_ex(trans); hmp->version = ver->cur_version; /* * If upgrading from version < 4 to version >= 4 the UNDO FIFO * must be reinitialized. */ if (over < HAMMER_VOL_VERSION_FOUR && ver->cur_version >= HAMMER_VOL_VERSION_FOUR) { hkprintf("upgrade undo to version 4\n"); error = hammer_upgrade_undo_4(trans); if (error) goto failed; } /* * Adjust the version in the volume header */ volume = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor.trans, volume, vol_version); volume->ondisk->vol_version = ver->cur_version; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); hammer_sync_unlock(trans); hammer_unlock(&hmp->flusher.finalize_lock); failed: ver->head.error = error; hammer_done_cursor(&cursor); return(0); }
/* * Generate UNDO record(s) for the block of data at the specified zone1 * or zone2 offset. * * The recovery code will execute UNDOs in reverse order, allowing overlaps. * All the UNDOs are executed together so if we already laid one down we * do not have to lay another one down for the same range. * * For HAMMER version 4+ UNDO a 512 byte boundary is enforced and a PAD * will be laid down for any unused space. UNDO FIFO media structures * will implement the hdr_seq field (it used to be reserved01), and * both flush and recovery mechanics will be very different. * * WARNING! See also hammer_generate_redo() in hammer_redo.c */ int hammer_generate_undo(hammer_transaction_t trans, hammer_off_t zone_off, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_undo_t undo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; hmp = trans->hmp; /* * A SYNC record may be required before we can lay down a general * UNDO. This ensures that the nominal recovery span contains * at least one SYNC record telling the recovery code how far * out-of-span it must go to run the REDOs. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && hmp->version >= HAMMER_VOL_VERSION_FOUR) { hammer_generate_redo_sync(trans); } /* * Enter the offset into our undo history. If there is an existing * undo we do not have to generate a new one. */ if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) return(0); root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* no undo recursion */ hammer_modify_volume_noundo(NULL, root_volume); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. */ while (len) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { undo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(undo, hmp->undo_seqno ^ 0x40000000); } else { undo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; /* no undo recursion */ hammer_modify_buffer_noundo(NULL, buffer); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_undo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; continue; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. */ if (n > len) { n = len; bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_undo) + (int)sizeof(struct hammer_fifo_tail); } if (hammer_debug_general & 0x0080) { kprintf("undo %016llx %d %d\n", (long long)next_offset, bytes, n); } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; undo->head.hdr_size = bytes; undo->head.hdr_seq = hmp->undo_seqno++; undo->head.hdr_crc = 0; undo->undo_offset = zone_off; undo->undo_data_bytes = n; bcopy(base, undo + 1, n); tail = (void *)((char *)undo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_UNDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(undo->head)); undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&undo->head + 1, bytes - sizeof(undo->head)); undomap->next_offset += bytes; hammer_stats_undo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); undo = (void *)(tail + 1); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); /* * Adjust for loop */ len -= n; base = (char *)base + n; zone_off += n; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return(error); }
/* * HAMMER version 4+ conversion support. * * Convert a HAMMER version < 4 UNDO FIFO area to a 4+ UNDO FIFO area. * The 4+ UNDO FIFO area is backwards compatible. The conversion is * needed to initialize the sequence space and place headers on the * new 512-byte undo boundary. */ int hammer_upgrade_undo_4(hammer_transaction_t trans) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_head_t head; hammer_fifo_tail_t tail; hammer_off_t next_offset; u_int32_t seqno; int error; int bytes; hmp = trans->hmp; root_volume = trans->rootvol; /* no undo recursion */ hammer_lock_ex(&hmp->undo_lock); hammer_modify_volume_noundo(NULL, root_volume); /* * Adjust the in-core undomap and the on-disk undomap. */ next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; undomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; /* * Loop over the entire UNDO space creating DUMMY entries. Sequence * numbers are assigned. */ seqno = 0; bytes = HAMMER_UNDO_ALIGN; while (next_offset != undomap->alloc_offset) { head = hammer_bnew(hmp, next_offset, &error, &buffer); if (error) break; hammer_modify_buffer_noundo(NULL, buffer); tail = (void *)((char *)head + bytes - sizeof(*tail)); head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; head->hdr_size = bytes; head->hdr_seq = seqno; head->hdr_crc = 0; tail = (void *)((char *)head + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; tail->tail_size = bytes; head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(head + 1, bytes - sizeof(*head)); hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; next_offset += HAMMER_UNDO_ALIGN; ++seqno; } /* * The sequence number will be the next sequence number to lay down. */ hmp->undo_seqno = seqno; kprintf("version upgrade seqno start %08x\n", seqno); hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return (error); }
/* * Reblock a B-Tree internal node. The parent must be adjusted to point to * the new copy of the internal node, and the node's children's parent * pointers must also be adjusted to point to the new copy. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_node_lock lockroot; hammer_node_t onode; hammer_node_t nnode; int error; int i; hammer_node_lock_init(&lockroot, cursor->node); error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); if (error) goto done; onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) goto done; /* * Move the node. Adjust the parent's pointer to us first. */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } /* * Now adjust our children's pointers to us. */ for (i = 0; i < nnode->ondisk->count; ++i) { elm = &nnode->ondisk->elms[i]; error = btree_set_parent(cursor->trans, nnode, elm); if (error) panic("reblock internal node: fixup problem"); } /* * Clean up. * * The new node replaces the current node in the cursor. The cursor * expects it to be locked so leave it locked. Discard onode. */ hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK INODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); done: hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); return (error); }
/* * Reblock a B-Tree leaf node. The parent must be adjusted to point to * the new copy of the leaf node. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_node_t onode; hammer_node_t nnode; int error; /* * Don't supply a hint when allocating the leaf. Fills are done * from the leaf upwards. */ onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) return (error); /* * Move the node */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK LNODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); return (error); }
int hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { struct hammer_mount *hmp = trans->hmp; struct mount *mp = hmp->mp; hammer_volume_t volume; int error; if (mp->mnt_flag & MNT_RDONLY) { kprintf("Cannot add volume to read-only HAMMER filesystem\n"); return (EINVAL); } if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) { kprintf("Max number of HAMMER volumes exceeded\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { kprintf("Another volume operation is in progress!\n"); return (EAGAIN); } /* * Find an unused volume number. */ int free_vol_no = 0; while (free_vol_no < HAMMER_MAX_VOLUMES && RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) { ++free_vol_no; } if (free_vol_no >= HAMMER_MAX_VOLUMES) { kprintf("Max number of HAMMER volumes exceeded\n"); hammer_unlock(&hmp->volume_lock); return (EINVAL); } struct vnode *devvp = NULL; error = hammer_setup_device(&devvp, ioc->device_name, 0); if (error) goto end; KKASSERT(devvp); error = hammer_format_volume_header( hmp, devvp, hmp->rootvol->ondisk->vol_name, free_vol_no, hmp->nvolumes+1, ioc->vol_size, ioc->boot_area_size, ioc->mem_area_size); hammer_close_device(&devvp, 0); if (error) goto end; error = hammer_install_volume(hmp, ioc->device_name, NULL); if (error) goto end; hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); ++hmp->nvolumes; /* * Set each volumes new value of the vol_count field. */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); hammer_modify_volume_field(trans, volume, vol_count); volume->ondisk->vol_count = hmp->nvolumes; hammer_modify_volume_done(volume); /* * Only changes to the header of the root volume * are automatically flushed to disk. For all * other volumes that we modify we do it here. * * No interlock is needed, volume buffers are not * messed with by bioops. */ if (volume != trans->rootvol && volume->io.modified) { hammer_crc_set_volume(volume->ondisk); hammer_io_flush(&volume->io, 0); } hammer_rel_volume(volume, 0); } volume = hammer_get_volume(hmp, free_vol_no, &error); KKASSERT(volume != NULL && error == 0); struct bigblock_stat stat; error = hammer_format_freemap(trans, volume, &stat); KKASSERT(error == 0); /* * Increase the total number of bigblocks and update stat/vstat totals. */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_bigblocks); trans->rootvol->ondisk->vol0_stat_bigblocks += stat.total_bigblocks; hammer_modify_volume_done(trans->rootvol); /* * Bigblock count changed so recompute the total number of blocks. */ mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); /* * Increase the number of free bigblocks * (including the copy in hmp) */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_freebigblocks); trans->rootvol->ondisk->vol0_stat_freebigblocks += stat.total_free_bigblocks; hmp->copy_stat_freebigblocks = trans->rootvol->ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); hammer_rel_volume(volume, 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); if (error) kprintf("An error occurred: %d\n", error); return (error); }
/* * Remove a volume. */ int hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { struct hammer_mount *hmp = trans->hmp; struct mount *mp = hmp->mp; hammer_volume_t volume; int error = 0; if (mp->mnt_flag & MNT_RDONLY) { kprintf("Cannot del volume from read-only HAMMER filesystem\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { kprintf("Another volume operation is in progress!\n"); return (EAGAIN); } volume = NULL; /* * find volume by volname */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); if (strcmp(volume->vol_name, ioc->device_name) == 0) { break; } hammer_rel_volume(volume, 0); volume = NULL; } if (volume == NULL) { kprintf("Couldn't find volume\n"); error = EINVAL; goto end; } if (volume == trans->rootvol) { kprintf("Cannot remove root-volume\n"); hammer_rel_volume(volume, 0); error = EINVAL; goto end; } /* * */ hmp->volume_to_remove = volume->vol_no; struct hammer_ioc_reblock reblock; bzero(&reblock, sizeof(reblock)); reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION; reblock.key_beg.obj_id = HAMMER_MIN_OBJID; reblock.key_end.localization = HAMMER_MAX_LOCALIZATION; reblock.key_end.obj_id = HAMMER_MAX_OBJID; reblock.head.flags = HAMMER_IOC_DO_FLAGS; reblock.free_level = 0; error = hammer_ioc_reblock(trans, ip, &reblock); if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) { error = EINTR; } if (error) { if (error == EINTR) { kprintf("reblock was interrupted\n"); } else { kprintf("reblock failed: %d\n", error); } hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); goto end; } /* * Sync filesystem */ int count = 0; while (hammer_flusher_haswork(hmp)) { hammer_flusher_sync(hmp); ++count; if (count >= 5) { if (count == 5) kprintf("HAMMER: flushing."); else kprintf("."); tsleep(&count, 0, "hmrufl", hz); } if (count == 30) { kprintf("giving up"); break; } } kprintf("\n"); hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); /* * We use stat later to update rootvol's bigblock stats */ struct bigblock_stat stat; error = hammer_free_freemap(trans, volume, &stat); if (error) { kprintf("Failed to free volume. Volume not empty!\n"); hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); goto end; } hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); /* * Unload buffers */ RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, hammer_unload_buffer, volume); error = hammer_unload_volume(volume, NULL); if (error == -1) { kprintf("Failed to unload volume\n"); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); goto end; } volume = NULL; --hmp->nvolumes; /* * Set each volume's new value of the vol_count field. */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); hammer_modify_volume_field(trans, volume, vol_count); volume->ondisk->vol_count = hmp->nvolumes; hammer_modify_volume_done(volume); /* * Only changes to the header of the root volume * are automatically flushed to disk. For all * other volumes that we modify we do it here. * * No interlock is needed, volume buffers are not * messed with by bioops. */ if (volume != trans->rootvol && volume->io.modified) { hammer_crc_set_volume(volume->ondisk); hammer_io_flush(&volume->io, 0); } hammer_rel_volume(volume, 0); } /* * Update the total number of bigblocks */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_bigblocks); trans->rootvol->ondisk->vol0_stat_bigblocks -= stat.total_bigblocks; hammer_modify_volume_done(trans->rootvol); /* * Update the number of free bigblocks * (including the copy in hmp) */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_freebigblocks); trans->rootvol->ondisk->vol0_stat_freebigblocks -= stat.total_free_bigblocks; hmp->copy_stat_freebigblocks = trans->rootvol->ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); /* * Bigblock count changed so recompute the total number of blocks. */ mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); /* * Erase the volume header of the removed device. * * This is to not accidentally mount the volume again. */ struct vnode *devvp = NULL; error = hammer_setup_device(&devvp, ioc->device_name, 0); if (error) { kprintf("Failed to open device: %s\n", ioc->device_name); goto end; } KKASSERT(devvp); error = hammer_clear_volume_header(devvp); if (error) { kprintf("Failed to clear volume header of device: %s\n", ioc->device_name); goto end; } hammer_close_device(&devvp, 0); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); return (error); }
void hammer_sync_unlock(hammer_transaction_t trans) { --trans->sync_lock_refs; hammer_unlock(&trans->hmp->sync_lock); }