/* * nnode is a newly allocated node, and now elm becomes the node * element within nnode's parent that represents a pointer to nnode, * or nnode becomes the root node if elm does not exist. */ static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, hammer_node_t onode, hammer_node_t nnode) { int error, i; bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); /* * Adjust the parent's pointer to us first. */ if (elm) { /* * We are not the root of the B-Tree */ KKASSERT(hammer_is_internal_node_elm(elm)); hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } /* * Now adjust our children's pointers to us * if we are an internal node. */ if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { for (i = 0; i < nnode->ondisk->count; ++i) { error = btree_set_parent_of_child(cursor->trans, nnode, &nnode->ondisk->elms[i]); if (error) hpanic("reblock internal node: fixup problem"); } } }
/* * Write out a new record. */ static int hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec, char *udata) { hammer_transaction_t trans; hammer_buffer_t data_buffer; hammer_off_t ndata_offset; hammer_tid_t high_tid; void *ndata; int error; int doprop; trans = cursor->trans; data_buffer = NULL; /* * Get the sync lock so the whole mess is atomic */ hammer_sync_lock_sh(trans); /* * Allocate and adjust data */ if (mrec->leaf.data_len && mrec->leaf.data_offset) { ndata = hammer_alloc_data(trans, mrec->leaf.data_len, mrec->leaf.base.rec_type, &ndata_offset, &data_buffer, 0, &error); if (ndata == NULL) return(error); mrec->leaf.data_offset = ndata_offset; hammer_modify_buffer(trans, data_buffer, NULL, 0); error = copyin(udata, ndata, mrec->leaf.data_len); if (error == 0) { if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) { kprintf("data crc mismatch on pipe\n"); error = EINVAL; } else { error = hammer_mirror_localize_data( ndata, &mrec->leaf); } } hammer_modify_buffer_done(data_buffer); } else { mrec->leaf.data_offset = 0; error = 0; ndata = NULL; } if (error) goto failed; /* * Do the insertion. This can fail with a EDEADLK or EALREADY */ cursor->flags |= HAMMER_CURSOR_INSERT; error = hammer_btree_lookup(cursor); if (error != ENOENT) { if (error == 0) error = EALREADY; goto failed; } error = hammer_btree_insert(cursor, &mrec->leaf, &doprop); /* * Cursor is left on the current element, we want to skip it now. */ cursor->flags |= HAMMER_CURSOR_ATEDISK; cursor->flags &= ~HAMMER_CURSOR_INSERT; /* * Track a count of active inodes. */ if (error == 0 && mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE && mrec->leaf.base.delete_tid == 0) { hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_inodes); ++trans->hmp->rootvol->ondisk->vol0_stat_inodes; hammer_modify_volume_done(trans->rootvol); } /* * vol0_next_tid must track the highest TID stored in the filesystem. * We do not need to generate undo for this update. */ high_tid = mrec->leaf.base.create_tid; if (high_tid < mrec->leaf.base.delete_tid) high_tid = mrec->leaf.base.delete_tid; if (trans->rootvol->ondisk->vol0_next_tid < high_tid) { hammer_modify_volume(trans, trans->rootvol, NULL, 0); trans->rootvol->ondisk->vol0_next_tid = high_tid; hammer_modify_volume_done(trans->rootvol); } /* * WARNING! cursor's leaf pointer may have changed after * do_propagation returns. */ if (error == 0 && doprop) hammer_btree_do_propagation(cursor, NULL, &mrec->leaf); failed: /* * Cleanup */ if (error && mrec->leaf.data_offset) { hammer_blockmap_free(cursor->trans, mrec->leaf.data_offset, mrec->leaf.data_len); } hammer_sync_unlock(trans); if (data_buffer) hammer_rel_buffer(data_buffer, 0); return(error); }
/* * HAMMER version 4+ REDO support. * * REDO records are used to improve fsync() performance. Instead of having * to go through a complete double-flush cycle involving at least two disk * synchronizations the fsync need only flush UNDO/REDO FIFO buffers through * the related REDO records, which is a single synchronization requiring * no track seeking. If a recovery becomes necessary the recovery code * will generate logical data writes based on the REDO records encountered. * That is, the recovery code will UNDO any partial meta-data/data writes * at the raw disk block level and then REDO the data writes at the logical * level. */ int hammer_generate_redo(hammer_transaction_t trans, hammer_inode_t ip, hammer_off_t file_off, u_int32_t flags, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_redo_t redo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; /* * Setup */ hmp = trans->hmp; root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* * No undo recursion when modifying the root volume */ hammer_modify_volume(NULL, root_volume, NULL, 0); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. * Loop at least once (len might be 0 as a degenerate case). */ for (;;) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { redo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(redo, hmp->undo_seqno ^ 0x40000000); } else { redo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; hammer_modify_buffer(NULL, buffer, NULL, 0); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_redo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_redo += bytes; continue; } /* * When generating an inode-related REDO record we track * the point in the UNDO/REDO FIFO containing the inode's * earliest REDO record. See hammer_generate_redo_sync(). * * redo_fifo_next is cleared when an inode is staged to * the backend and then used to determine how to reassign * redo_fifo_start after the inode flush completes. */ if (ip) { redo->redo_objid = ip->obj_id; redo->redo_localization = ip->obj_localization; if ((ip->flags & HAMMER_INODE_RDIRTY) == 0) { ip->redo_fifo_start = next_offset; if (RB_INSERT(hammer_redo_rb_tree, &hmp->rb_redo_root, ip)) { panic("hammer_generate_redo: " "cannot insert inode %p on " "redo FIFO", ip); } ip->flags |= HAMMER_INODE_RDIRTY; } if (ip->redo_fifo_next == 0) ip->redo_fifo_next = next_offset; } else { redo->redo_objid = 0; redo->redo_localization = 0; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. If no data buffer * is supplied there is no payload. */ if (base == NULL) { n = 0; } else if (n > len) { n = len; } bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_redo) + (int)sizeof(struct hammer_fifo_tail); if (hammer_debug_general & 0x0080) { kprintf("redo %016llx %d %d\n", (long long)next_offset, bytes, n); } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_REDO; redo->head.hdr_size = bytes; redo->head.hdr_seq = hmp->undo_seqno++; redo->head.hdr_crc = 0; redo->redo_mtime = trans->time; redo->redo_offset = file_off; redo->redo_flags = flags; /* * Incremental payload. If no payload we throw the entire * len into redo_data_bytes and will not loop. */ if (base) { redo->redo_data_bytes = n; bcopy(base, redo + 1, n); len -= n; base = (char *)base + n; file_off += n; } else { redo->redo_data_bytes = len; file_off += len; len = 0; } tail = (void *)((char *)redo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_REDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(redo->head)); redo->head.hdr_crc = crc32(redo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&redo->head + 1, bytes - sizeof(redo->head)); undomap->next_offset += bytes; hammer_stats_redo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); redo = (void *)(tail + 1); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); if (len == 0) break; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); /* * Make sure the nominal undo span contains at least one REDO_SYNC, * otherwise the REDO recovery will not be triggered. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && flags != HAMMER_REDO_SYNC) { hammer_generate_redo_sync(trans); } return(error); }
/* * Set version info */ static int hammer_ioc_set_version(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_version *ver) { hammer_mount_t hmp = trans->hmp; struct hammer_cursor cursor; hammer_volume_t volume; int error; int over = hmp->version; /* * Generally do not allow downgrades. However, version 4 can * be downgraded to version 3. */ if (ver->cur_version < hmp->version) { if (!(ver->cur_version == 3 && hmp->version == 4)) return(EINVAL); } if (ver->cur_version == hmp->version) return(0); if (ver->cur_version > HAMMER_VOL_VERSION_MAX) return(EINVAL); if (hmp->ronly) return(EROFS); /* * Update the root volume header and the version cached in * the hammer_mount structure. */ error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) goto failed; hammer_lock_ex(&hmp->flusher.finalize_lock); hammer_sync_lock_ex(trans); hmp->version = ver->cur_version; /* * If upgrading from version < 4 to version >= 4 the UNDO FIFO * must be reinitialized. */ if (over < HAMMER_VOL_VERSION_FOUR && ver->cur_version >= HAMMER_VOL_VERSION_FOUR) { hkprintf("upgrade undo to version 4\n"); error = hammer_upgrade_undo_4(trans); if (error) goto failed; } /* * Adjust the version in the volume header */ volume = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor.trans, volume, vol_version); volume->ondisk->vol_version = ver->cur_version; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); hammer_sync_unlock(trans); hammer_unlock(&hmp->flusher.finalize_lock); failed: ver->head.error = error; hammer_done_cursor(&cursor); return(0); }
/* * Generate UNDO record(s) for the block of data at the specified zone1 * or zone2 offset. * * The recovery code will execute UNDOs in reverse order, allowing overlaps. * All the UNDOs are executed together so if we already laid one down we * do not have to lay another one down for the same range. * * For HAMMER version 4+ UNDO a 512 byte boundary is enforced and a PAD * will be laid down for any unused space. UNDO FIFO media structures * will implement the hdr_seq field (it used to be reserved01), and * both flush and recovery mechanics will be very different. * * WARNING! See also hammer_generate_redo() in hammer_redo.c */ int hammer_generate_undo(hammer_transaction_t trans, hammer_off_t zone_off, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_undo_t undo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; hmp = trans->hmp; /* * A SYNC record may be required before we can lay down a general * UNDO. This ensures that the nominal recovery span contains * at least one SYNC record telling the recovery code how far * out-of-span it must go to run the REDOs. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && hmp->version >= HAMMER_VOL_VERSION_FOUR) { hammer_generate_redo_sync(trans); } /* * Enter the offset into our undo history. If there is an existing * undo we do not have to generate a new one. */ if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) return(0); root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* no undo recursion */ hammer_modify_volume_noundo(NULL, root_volume); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. */ while (len) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { undo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(undo, hmp->undo_seqno ^ 0x40000000); } else { undo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; /* no undo recursion */ hammer_modify_buffer_noundo(NULL, buffer); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_undo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; continue; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. */ if (n > len) { n = len; bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_undo) + (int)sizeof(struct hammer_fifo_tail); } if (hammer_debug_general & 0x0080) { kprintf("undo %016llx %d %d\n", (long long)next_offset, bytes, n); } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; undo->head.hdr_size = bytes; undo->head.hdr_seq = hmp->undo_seqno++; undo->head.hdr_crc = 0; undo->undo_offset = zone_off; undo->undo_data_bytes = n; bcopy(base, undo + 1, n); tail = (void *)((char *)undo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_UNDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(undo->head)); undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&undo->head + 1, bytes - sizeof(undo->head)); undomap->next_offset += bytes; hammer_stats_undo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); undo = (void *)(tail + 1); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); /* * Adjust for loop */ len -= n; base = (char *)base + n; zone_off += n; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return(error); }
/* * HAMMER version 4+ conversion support. * * Convert a HAMMER version < 4 UNDO FIFO area to a 4+ UNDO FIFO area. * The 4+ UNDO FIFO area is backwards compatible. The conversion is * needed to initialize the sequence space and place headers on the * new 512-byte undo boundary. */ int hammer_upgrade_undo_4(hammer_transaction_t trans) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_head_t head; hammer_fifo_tail_t tail; hammer_off_t next_offset; u_int32_t seqno; int error; int bytes; hmp = trans->hmp; root_volume = trans->rootvol; /* no undo recursion */ hammer_lock_ex(&hmp->undo_lock); hammer_modify_volume_noundo(NULL, root_volume); /* * Adjust the in-core undomap and the on-disk undomap. */ next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; undomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; /* * Loop over the entire UNDO space creating DUMMY entries. Sequence * numbers are assigned. */ seqno = 0; bytes = HAMMER_UNDO_ALIGN; while (next_offset != undomap->alloc_offset) { head = hammer_bnew(hmp, next_offset, &error, &buffer); if (error) break; hammer_modify_buffer_noundo(NULL, buffer); tail = (void *)((char *)head + bytes - sizeof(*tail)); head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; head->hdr_size = bytes; head->hdr_seq = seqno; head->hdr_crc = 0; tail = (void *)((char *)head + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; tail->tail_size = bytes; head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(head + 1, bytes - sizeof(*head)); hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; next_offset += HAMMER_UNDO_ALIGN; ++seqno; } /* * The sequence number will be the next sequence number to lay down. */ hmp->undo_seqno = seqno; kprintf("version upgrade seqno start %08x\n", seqno); hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return (error); }
/* * Reblock a B-Tree internal node. The parent must be adjusted to point to * the new copy of the internal node, and the node's children's parent * pointers must also be adjusted to point to the new copy. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { struct hammer_node_lock lockroot; hammer_node_t onode; hammer_node_t nnode; int error; int i; hammer_node_lock_init(&lockroot, cursor->node); error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); if (error) goto done; onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) goto done; /* * Move the node. Adjust the parent's pointer to us first. */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } /* * Now adjust our children's pointers to us. */ for (i = 0; i < nnode->ondisk->count; ++i) { elm = &nnode->ondisk->elms[i]; error = btree_set_parent(cursor->trans, nnode, elm); if (error) panic("reblock internal node: fixup problem"); } /* * Clean up. * * The new node replaces the current node in the cursor. The cursor * expects it to be locked so leave it locked. Discard onode. */ hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK INODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); done: hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); return (error); }
/* * Reblock a B-Tree leaf node. The parent must be adjusted to point to * the new copy of the leaf node. * * elm is a pointer to the parent element pointing at cursor.node. */ static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_node_t onode; hammer_node_t nnode; int error; /* * Don't supply a hint when allocating the leaf. Fills are done * from the leaf upwards. */ onode = cursor->node; nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) return (error); /* * Move the node */ hammer_lock_ex(&nnode->lock); hammer_modify_node_noundo(cursor->trans, nnode); bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); if (elm) { /* * We are not the root of the B-Tree */ hammer_modify_node(cursor->trans, cursor->parent, &elm->internal.subtree_offset, sizeof(elm->internal.subtree_offset)); elm->internal.subtree_offset = nnode->node_offset; hammer_modify_node_done(cursor->parent); } else { /* * We are the root of the B-Tree */ hammer_volume_t volume; volume = hammer_get_root_volume(cursor->trans->hmp, &error); KKASSERT(error == 0); hammer_modify_volume_field(cursor->trans, volume, vol0_btree_root); volume->ondisk->vol0_btree_root = nnode->node_offset; hammer_modify_volume_done(volume); hammer_rel_volume(volume, 0); } hammer_cursor_replaced_node(onode, nnode); hammer_delete_node(cursor->trans, onode); if (hammer_debug_general & 0x4000) { kprintf("REBLOCK LNODE %016llx -> %016llx\n", (long long)onode->node_offset, (long long)nnode->node_offset); } hammer_modify_node_done(nnode); cursor->node = nnode; hammer_unlock(&onode->lock); hammer_rel_node(onode); return (error); }
int hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { struct hammer_mount *hmp = trans->hmp; struct mount *mp = hmp->mp; hammer_volume_t volume; int error; if (mp->mnt_flag & MNT_RDONLY) { kprintf("Cannot add volume to read-only HAMMER filesystem\n"); return (EINVAL); } if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) { kprintf("Max number of HAMMER volumes exceeded\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { kprintf("Another volume operation is in progress!\n"); return (EAGAIN); } /* * Find an unused volume number. */ int free_vol_no = 0; while (free_vol_no < HAMMER_MAX_VOLUMES && RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) { ++free_vol_no; } if (free_vol_no >= HAMMER_MAX_VOLUMES) { kprintf("Max number of HAMMER volumes exceeded\n"); hammer_unlock(&hmp->volume_lock); return (EINVAL); } struct vnode *devvp = NULL; error = hammer_setup_device(&devvp, ioc->device_name, 0); if (error) goto end; KKASSERT(devvp); error = hammer_format_volume_header( hmp, devvp, hmp->rootvol->ondisk->vol_name, free_vol_no, hmp->nvolumes+1, ioc->vol_size, ioc->boot_area_size, ioc->mem_area_size); hammer_close_device(&devvp, 0); if (error) goto end; error = hammer_install_volume(hmp, ioc->device_name, NULL); if (error) goto end; hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); ++hmp->nvolumes; /* * Set each volumes new value of the vol_count field. */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); hammer_modify_volume_field(trans, volume, vol_count); volume->ondisk->vol_count = hmp->nvolumes; hammer_modify_volume_done(volume); /* * Only changes to the header of the root volume * are automatically flushed to disk. For all * other volumes that we modify we do it here. * * No interlock is needed, volume buffers are not * messed with by bioops. */ if (volume != trans->rootvol && volume->io.modified) { hammer_crc_set_volume(volume->ondisk); hammer_io_flush(&volume->io, 0); } hammer_rel_volume(volume, 0); } volume = hammer_get_volume(hmp, free_vol_no, &error); KKASSERT(volume != NULL && error == 0); struct bigblock_stat stat; error = hammer_format_freemap(trans, volume, &stat); KKASSERT(error == 0); /* * Increase the total number of bigblocks and update stat/vstat totals. */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_bigblocks); trans->rootvol->ondisk->vol0_stat_bigblocks += stat.total_bigblocks; hammer_modify_volume_done(trans->rootvol); /* * Bigblock count changed so recompute the total number of blocks. */ mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); /* * Increase the number of free bigblocks * (including the copy in hmp) */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_freebigblocks); trans->rootvol->ondisk->vol0_stat_freebigblocks += stat.total_free_bigblocks; hmp->copy_stat_freebigblocks = trans->rootvol->ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); hammer_rel_volume(volume, 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); if (error) kprintf("An error occurred: %d\n", error); return (error); }
/* * Remove a volume. */ int hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { struct hammer_mount *hmp = trans->hmp; struct mount *mp = hmp->mp; hammer_volume_t volume; int error = 0; if (mp->mnt_flag & MNT_RDONLY) { kprintf("Cannot del volume from read-only HAMMER filesystem\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { kprintf("Another volume operation is in progress!\n"); return (EAGAIN); } volume = NULL; /* * find volume by volname */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); if (strcmp(volume->vol_name, ioc->device_name) == 0) { break; } hammer_rel_volume(volume, 0); volume = NULL; } if (volume == NULL) { kprintf("Couldn't find volume\n"); error = EINVAL; goto end; } if (volume == trans->rootvol) { kprintf("Cannot remove root-volume\n"); hammer_rel_volume(volume, 0); error = EINVAL; goto end; } /* * */ hmp->volume_to_remove = volume->vol_no; struct hammer_ioc_reblock reblock; bzero(&reblock, sizeof(reblock)); reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION; reblock.key_beg.obj_id = HAMMER_MIN_OBJID; reblock.key_end.localization = HAMMER_MAX_LOCALIZATION; reblock.key_end.obj_id = HAMMER_MAX_OBJID; reblock.head.flags = HAMMER_IOC_DO_FLAGS; reblock.free_level = 0; error = hammer_ioc_reblock(trans, ip, &reblock); if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) { error = EINTR; } if (error) { if (error == EINTR) { kprintf("reblock was interrupted\n"); } else { kprintf("reblock failed: %d\n", error); } hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); goto end; } /* * Sync filesystem */ int count = 0; while (hammer_flusher_haswork(hmp)) { hammer_flusher_sync(hmp); ++count; if (count >= 5) { if (count == 5) kprintf("HAMMER: flushing."); else kprintf("."); tsleep(&count, 0, "hmrufl", hz); } if (count == 30) { kprintf("giving up"); break; } } kprintf("\n"); hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); /* * We use stat later to update rootvol's bigblock stats */ struct bigblock_stat stat; error = hammer_free_freemap(trans, volume, &stat); if (error) { kprintf("Failed to free volume. Volume not empty!\n"); hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); goto end; } hmp->volume_to_remove = -1; hammer_rel_volume(volume, 0); /* * Unload buffers */ RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, hammer_unload_buffer, volume); error = hammer_unload_volume(volume, NULL); if (error == -1) { kprintf("Failed to unload volume\n"); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); goto end; } volume = NULL; --hmp->nvolumes; /* * Set each volume's new value of the vol_count field. */ for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) { volume = hammer_get_volume(hmp, vol_no, &error); if (volume == NULL && error == ENOENT) { /* * Skip unused volume numbers */ error = 0; continue; } KKASSERT(volume != NULL && error == 0); hammer_modify_volume_field(trans, volume, vol_count); volume->ondisk->vol_count = hmp->nvolumes; hammer_modify_volume_done(volume); /* * Only changes to the header of the root volume * are automatically flushed to disk. For all * other volumes that we modify we do it here. * * No interlock is needed, volume buffers are not * messed with by bioops. */ if (volume != trans->rootvol && volume->io.modified) { hammer_crc_set_volume(volume->ondisk); hammer_io_flush(&volume->io, 0); } hammer_rel_volume(volume, 0); } /* * Update the total number of bigblocks */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_bigblocks); trans->rootvol->ondisk->vol0_stat_bigblocks -= stat.total_bigblocks; hammer_modify_volume_done(trans->rootvol); /* * Update the number of free bigblocks * (including the copy in hmp) */ hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_freebigblocks); trans->rootvol->ondisk->vol0_stat_freebigblocks -= stat.total_free_bigblocks; hmp->copy_stat_freebigblocks = trans->rootvol->ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); /* * Bigblock count changed so recompute the total number of blocks. */ mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); /* * Erase the volume header of the removed device. * * This is to not accidentally mount the volume again. */ struct vnode *devvp = NULL; error = hammer_setup_device(&devvp, ioc->device_name, 0); if (error) { kprintf("Failed to open device: %s\n", ioc->device_name); goto end; } KKASSERT(devvp); error = hammer_clear_volume_header(devvp); if (error) { kprintf("Failed to clear volume header of device: %s\n", ioc->device_name); goto end; } hammer_close_device(&devvp, 0); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); return (error); }