/* * Generate UNDO record(s) for the block of data at the specified zone1 * or zone2 offset. * * The recovery code will execute UNDOs in reverse order, allowing overlaps. * All the UNDOs are executed together so if we already laid one down we * do not have to lay another one down for the same range. * * For HAMMER version 4+ UNDO a 512 byte boundary is enforced and a PAD * will be laid down for any unused space. UNDO FIFO media structures * will implement the hdr_seq field (it used to be reserved01), and * both flush and recovery mechanics will be very different. * * WARNING! See also hammer_generate_redo() in hammer_redo.c */ int hammer_generate_undo(hammer_transaction_t trans, hammer_off_t zone_off, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_undo_t undo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; hmp = trans->hmp; /* * A SYNC record may be required before we can lay down a general * UNDO. This ensures that the nominal recovery span contains * at least one SYNC record telling the recovery code how far * out-of-span it must go to run the REDOs. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && hmp->version >= HAMMER_VOL_VERSION_FOUR) { hammer_generate_redo_sync(trans); } /* * Enter the offset into our undo history. If there is an existing * undo we do not have to generate a new one. */ if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) return(0); root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* no undo recursion */ hammer_modify_volume_noundo(NULL, root_volume); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. */ while (len) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { undo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(undo, hmp->undo_seqno ^ 0x40000000); } else { undo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; /* no undo recursion */ hammer_modify_buffer_noundo(NULL, buffer); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_undo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; continue; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. */ if (n > len) { n = len; bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_undo) + (int)sizeof(struct hammer_fifo_tail); } if (hammer_debug_general & 0x0080) { kprintf("undo %016llx %d %d\n", (long long)next_offset, bytes, n); } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; undo->head.hdr_size = bytes; undo->head.hdr_seq = hmp->undo_seqno++; undo->head.hdr_crc = 0; undo->undo_offset = zone_off; undo->undo_data_bytes = n; bcopy(base, undo + 1, n); tail = (void *)((char *)undo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_UNDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(undo->head)); undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&undo->head + 1, bytes - sizeof(undo->head)); undomap->next_offset += bytes; hammer_stats_undo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); undo = (void *)(tail + 1); tail = (void *)((char *)undo + bytes - sizeof(*tail)); if ((void *)undo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; undo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); /* * Adjust for loop */ len -= n; base = (char *)base + n; zone_off += n; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return(error); }
/* * HAMMER version 4+ REDO support. * * REDO records are used to improve fsync() performance. Instead of having * to go through a complete double-flush cycle involving at least two disk * synchronizations the fsync need only flush UNDO/REDO FIFO buffers through * the related REDO records, which is a single synchronization requiring * no track seeking. If a recovery becomes necessary the recovery code * will generate logical data writes based on the REDO records encountered. * That is, the recovery code will UNDO any partial meta-data/data writes * at the raw disk block level and then REDO the data writes at the logical * level. */ int hammer_generate_redo(hammer_transaction_t trans, hammer_inode_t ip, hammer_off_t file_off, u_int32_t flags, void *base, int len) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_redo_t redo; hammer_fifo_tail_t tail; hammer_off_t next_offset; int error; int bytes; int n; /* * Setup */ hmp = trans->hmp; root_volume = trans->rootvol; undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* * No undo recursion when modifying the root volume */ hammer_modify_volume(NULL, root_volume, NULL, 0); hammer_lock_ex(&hmp->undo_lock); /* undo had better not roll over (loose test) */ if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) panic("hammer: insufficient undo FIFO space!"); /* * Loop until the undo for the entire range has been laid down. * Loop at least once (len might be 0 as a degenerate case). */ for (;;) { /* * Fetch the layout offset in the UNDO FIFO, wrap it as * necessary. */ if (undomap->next_offset == undomap->alloc_offset) { undomap->next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); } next_offset = undomap->next_offset; /* * This is a tail-chasing FIFO, when we hit the start of a new * buffer we don't have to read it in. */ if ((next_offset & HAMMER_BUFMASK) == 0) { redo = hammer_bnew(hmp, next_offset, &error, &buffer); hammer_format_undo(redo, hmp->undo_seqno ^ 0x40000000); } else { redo = hammer_bread(hmp, next_offset, &error, &buffer); } if (error) break; hammer_modify_buffer(NULL, buffer, NULL, 0); /* * Calculate how big a media structure fits up to the next * alignment point and how large a data payload we can * accomodate. * * If n calculates to 0 or negative there is no room for * anything but a PAD. */ bytes = HAMMER_UNDO_ALIGN - ((int)next_offset & HAMMER_UNDO_MASK); n = bytes - (int)sizeof(struct hammer_fifo_redo) - (int)sizeof(struct hammer_fifo_tail); /* * If available space is insufficient for any payload * we have to lay down a PAD. * * The minimum PAD is 8 bytes and the head and tail will * overlap each other in that case. PADs do not have * sequence numbers or CRCs. * * A PAD may not start on a boundary. That is, every * 512-byte block in the UNDO/REDO FIFO must begin with * a record containing a sequence number. */ if (n <= 0) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ undomap->next_offset += bytes; hammer_modify_buffer_done(buffer); hammer_stats_redo += bytes; continue; } /* * When generating an inode-related REDO record we track * the point in the UNDO/REDO FIFO containing the inode's * earliest REDO record. See hammer_generate_redo_sync(). * * redo_fifo_next is cleared when an inode is staged to * the backend and then used to determine how to reassign * redo_fifo_start after the inode flush completes. */ if (ip) { redo->redo_objid = ip->obj_id; redo->redo_localization = ip->obj_localization; if ((ip->flags & HAMMER_INODE_RDIRTY) == 0) { ip->redo_fifo_start = next_offset; if (RB_INSERT(hammer_redo_rb_tree, &hmp->rb_redo_root, ip)) { panic("hammer_generate_redo: " "cannot insert inode %p on " "redo FIFO", ip); } ip->flags |= HAMMER_INODE_RDIRTY; } if (ip->redo_fifo_next == 0) ip->redo_fifo_next = next_offset; } else { redo->redo_objid = 0; redo->redo_localization = 0; } /* * Calculate the actual payload and recalculate the size * of the media structure as necessary. If no data buffer * is supplied there is no payload. */ if (base == NULL) { n = 0; } else if (n > len) { n = len; } bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK) + (int)sizeof(struct hammer_fifo_redo) + (int)sizeof(struct hammer_fifo_tail); if (hammer_debug_general & 0x0080) { kprintf("redo %016llx %d %d\n", (long long)next_offset, bytes, n); } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_REDO; redo->head.hdr_size = bytes; redo->head.hdr_seq = hmp->undo_seqno++; redo->head.hdr_crc = 0; redo->redo_mtime = trans->time; redo->redo_offset = file_off; redo->redo_flags = flags; /* * Incremental payload. If no payload we throw the entire * len into redo_data_bytes and will not loop. */ if (base) { redo->redo_data_bytes = n; bcopy(base, redo + 1, n); len -= n; base = (char *)base + n; file_off += n; } else { redo->redo_data_bytes = len; file_off += len; len = 0; } tail = (void *)((char *)redo + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_REDO; tail->tail_size = bytes; KKASSERT(bytes >= sizeof(redo->head)); redo->head.hdr_crc = crc32(redo, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(&redo->head + 1, bytes - sizeof(redo->head)); undomap->next_offset += bytes; hammer_stats_redo += bytes; /* * Before we finish off the buffer we have to deal with any * junk between the end of the media structure we just laid * down and the UNDO alignment boundary. We do this by laying * down a dummy PAD. Even though we will probably overwrite * it almost immediately we have to do this so recovery runs * can iterate the UNDO space without having to depend on * the indices in the volume header. * * This dummy PAD will be overwritten on the next undo so * we do not adjust undomap->next_offset. */ bytes = HAMMER_UNDO_ALIGN - ((int)undomap->next_offset & HAMMER_UNDO_MASK); if (bytes != HAMMER_UNDO_ALIGN) { KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); redo = (void *)(tail + 1); tail = (void *)((char *)redo + bytes - sizeof(*tail)); if ((void *)redo != (void *)tail) { tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_PAD; tail->tail_size = bytes; } redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; redo->head.hdr_size = bytes; /* NO CRC OR SEQ NO */ } hammer_modify_buffer_done(buffer); if (len == 0) break; } hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); /* * Make sure the nominal undo span contains at least one REDO_SYNC, * otherwise the REDO recovery will not be triggered. */ if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && flags != HAMMER_REDO_SYNC) { hammer_generate_redo_sync(trans); } return(error); }
/* * HAMMER version 4+ conversion support. * * Convert a HAMMER version < 4 UNDO FIFO area to a 4+ UNDO FIFO area. * The 4+ UNDO FIFO area is backwards compatible. The conversion is * needed to initialize the sequence space and place headers on the * new 512-byte undo boundary. */ int hammer_upgrade_undo_4(hammer_transaction_t trans) { hammer_mount_t hmp; hammer_volume_t root_volume; hammer_blockmap_t undomap; hammer_buffer_t buffer = NULL; hammer_fifo_head_t head; hammer_fifo_tail_t tail; hammer_off_t next_offset; u_int32_t seqno; int error; int bytes; hmp = trans->hmp; root_volume = trans->rootvol; /* no undo recursion */ hammer_lock_ex(&hmp->undo_lock); hammer_modify_volume_noundo(NULL, root_volume); /* * Adjust the in-core undomap and the on-disk undomap. */ next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; undomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; undomap->next_offset = next_offset; undomap->first_offset = next_offset; /* * Loop over the entire UNDO space creating DUMMY entries. Sequence * numbers are assigned. */ seqno = 0; bytes = HAMMER_UNDO_ALIGN; while (next_offset != undomap->alloc_offset) { head = hammer_bnew(hmp, next_offset, &error, &buffer); if (error) break; hammer_modify_buffer_noundo(NULL, buffer); tail = (void *)((char *)head + bytes - sizeof(*tail)); head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; head->hdr_size = bytes; head->hdr_seq = seqno; head->hdr_crc = 0; tail = (void *)((char *)head + bytes - sizeof(*tail)); tail->tail_signature = HAMMER_TAIL_SIGNATURE; tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; tail->tail_size = bytes; head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ crc32(head + 1, bytes - sizeof(*head)); hammer_modify_buffer_done(buffer); hammer_stats_undo += bytes; next_offset += HAMMER_UNDO_ALIGN; ++seqno; } /* * The sequence number will be the next sequence number to lay down. */ hmp->undo_seqno = seqno; kprintf("version upgrade seqno start %08x\n", seqno); hammer_modify_volume_done(root_volume); hammer_unlock(&hmp->undo_lock); if (buffer) hammer_rel_buffer(buffer, 0); return (error); }