/* * Copy records from userland to the target mirror. * * The PFS is identified in the mirror structure. The passed ip is just * some directory in the overall HAMMER filesystem and has nothing to * do with the PFS. In fact, there might not even be a root directory for * the PFS yet! */ int hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_mirror_rw *mirror) { union hammer_ioc_mrecord_any mrec; struct hammer_cursor cursor; u_int32_t localization; int checkspace_count = 0; int error; int bytes; char *uptr; int seq; localization = (u_int32_t)mirror->pfs_id << 16; seq = trans->hmp->flusher.done; /* * Validate the mirror structure and relocalize the tracking keys. */ if (mirror->size < 0 || mirror->size > 0x70000000) return(EINVAL); mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK; mirror->key_beg.localization += localization; mirror->key_end.localization &= HAMMER_LOCALIZE_MASK; mirror->key_end.localization += localization; mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; mirror->key_cur.localization += localization; /* * Set up our tracking cursor for the loop. The tracking cursor * is used to delete records that are no longer present on the * master. The last handled record at key_cur must be skipped. */ error = hammer_init_cursor(trans, &cursor, NULL, NULL); cursor.key_beg = mirror->key_cur; cursor.key_end = mirror->key_end; cursor.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_first(&cursor); if (error == 0) cursor.flags |= HAMMER_CURSOR_ATEDISK; if (error == ENOENT) error = 0; /* * Loop until our input buffer has been exhausted. */ while (error == 0 && mirror->count + sizeof(mrec.head) <= mirror->size) { /* * Don't blow out the buffer cache. Leave room for frontend * cache as well. * * WARNING: See warnings in hammer_unlock_cursor() function. */ while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } /* * If there is insufficient free space it may be due to * reserved bigblocks, which flushing might fix. */ if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) { if (++checkspace_count == 10) { error = ENOSPC; break; } hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async(trans->hmp, NULL); } /* * Acquire and validate header */ if ((bytes = mirror->size - mirror->count) > sizeof(mrec)) bytes = sizeof(mrec); uptr = (char *)mirror->ubuf + mirror->count; error = copyin(uptr, &mrec, bytes); if (error) break; if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) { error = EINVAL; break; } if (mrec.head.rec_size < sizeof(mrec.head) || mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE || mirror->count + mrec.head.rec_size > mirror->size) { error = EINVAL; break; } switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) { case HAMMER_MREC_TYPE_SKIP: if (mrec.head.rec_size != sizeof(mrec.skip)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization); break; case HAMMER_MREC_TYPE_REC: if (mrec.head.rec_size < sizeof(mrec.rec)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec)); break; case HAMMER_MREC_TYPE_REC_NODATA: case HAMMER_MREC_TYPE_REC_BADCRC: /* * Records with bad data payloads are ignored XXX. * Records with no data payload have to be skipped * (they shouldn't have been written in the first * place). */ if (mrec.head.rec_size < sizeof(mrec.rec)) error = EINVAL; break; case HAMMER_MREC_TYPE_PASS: if (mrec.head.rec_size != sizeof(mrec.rec)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization); break; default: error = EINVAL; break; } /* * Retry the current record on deadlock, otherwise setup * for the next loop. */ if (error == EDEADLK) { while (error == EDEADLK) { hammer_sync_lock_sh(trans); hammer_recover_cursor(&cursor); error = hammer_cursor_upgrade(&cursor); hammer_sync_unlock(trans); } } else { if (error == EALREADY) error = 0; if (error == 0) { mirror->count += HAMMER_HEAD_DOALIGN(mrec.head.rec_size); } } } hammer_done_cursor(&cursor); /* * cumulative error */ if (error) { mirror->head.flags |= HAMMER_IOC_HEAD_ERROR; mirror->head.error = error; } /* * ioctls don't update the RW data structure if an error is returned, * always return 0. */ return(0); }
/* * Rollback the specified PFS to (trunc_tid - 1), removing everything * greater or equal to trunc_tid. The PFS must not have been in no-mirror * mode or the MIRROR_FILTERED scan will not work properly. * * This is typically used to remove any partial syncs when upgrading a * slave to a master. It can theoretically also be used to rollback * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN * PRUNED, and to points that are older only if they are on a retained * (pruning softlink) boundary. * * Rollbacks destroy information. If you don't mind inode numbers changing * a better way would be to cpdup a snapshot back onto the master. */ static int hammer_pfs_rollback(hammer_transaction_t trans, hammer_pseudofs_inmem_t pfsm, hammer_tid_t trunc_tid) { struct hammer_cmirror cmirror; struct hammer_cursor cursor; struct hammer_base_elm key_cur; int error; int seq; bzero(&cmirror, sizeof(cmirror)); bzero(&key_cur, sizeof(key_cur)); key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization; key_cur.obj_id = HAMMER_MIN_OBJID; key_cur.key = HAMMER_MIN_KEY; key_cur.create_tid = 1; key_cur.rec_type = HAMMER_MIN_RECTYPE; seq = trans->hmp->flusher.done; retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); goto failed; } cursor.key_beg = key_cur; cursor.key_end.localization = HAMMER_MAX_LOCALIZATION | pfsm->localization; cursor.key_end.obj_id = HAMMER_MAX_OBJID; cursor.key_end.key = HAMMER_MAX_KEY; cursor.key_end.create_tid = HAMMER_MAX_TID; cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; /* * Do an optimized scan of only records created or modified * >= trunc_tid, so we can fix up those records. We must * still check the TIDs but this greatly reduces the size of * the scan. */ cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; cursor.cmirror = &cmirror; cmirror.mirror_tid = trunc_tid; error = hammer_btree_first(&cursor); while (error == 0) { /* * Abort the rollback. */ if (error == 0) { error = hammer_signal_check(trans->hmp); if (error) break; } /* * We only care about leafs. Internal nodes can be returned * in mirror-filtered mode (they are used to generate SKIP * mrecords), but we don't need them for this code. * * WARNING: See warnings in hammer_unlock_cursor() function. */ cursor.flags |= HAMMER_CURSOR_ATEDISK; if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { key_cur = cursor.node->ondisk->elms[cursor.index].base; error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); } while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } if (error == 0) error = hammer_btree_iterate(&cursor); } if (error == ENOENT) error = 0; hammer_done_cursor(&cursor); if (error == EDEADLK) goto retry; failed: return(error); }
int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_prune *prune) { struct hammer_cursor cursor; hammer_btree_leaf_elm_t elm; struct hammer_ioc_prune_elm *copy_elms; struct hammer_ioc_prune_elm *user_elms; int error; int isdir; int elm_array_size; int seq; if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS) return(EINVAL); if ((prune->key_beg.localization | prune->key_end.localization) & HAMMER_LOCALIZE_PSEUDOFS_MASK) { return(EINVAL); } if (prune->key_beg.localization > prune->key_end.localization) return(EINVAL); if (prune->key_beg.localization == prune->key_end.localization) { if (prune->key_beg.obj_id > prune->key_end.obj_id) return(EINVAL); /* key-space limitations - no check needed */ } if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms) return(EINVAL); /* 22 EINVAL */ prune->key_cur.localization = (prune->key_end.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; prune->key_cur.obj_id = prune->key_end.obj_id; prune->key_cur.key = HAMMER_MAX_KEY; /* * Copy element array from userland */ elm_array_size = sizeof(*copy_elms) * prune->nelms; user_elms = prune->elms; copy_elms = kmalloc(elm_array_size, M_TEMP, M_WAITOK); if ((error = copyin(user_elms, copy_elms, elm_array_size)) != 0) goto failed; prune->elms = copy_elms; seq = trans->hmp->flusher.done; /* * Scan backwards. Retries typically occur if a deadlock is detected. */ retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); goto failed; } cursor.key_beg.localization = (prune->key_beg.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; cursor.key_beg.obj_id = prune->key_beg.obj_id; cursor.key_beg.key = HAMMER_MIN_KEY; cursor.key_beg.create_tid = 1; cursor.key_beg.delete_tid = 0; cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; cursor.key_beg.obj_type = 0; cursor.key_end.localization = prune->key_cur.localization; cursor.key_end.obj_id = prune->key_cur.obj_id; cursor.key_end.key = prune->key_cur.key; cursor.key_end.create_tid = HAMMER_MAX_TID - 1; cursor.key_end.delete_tid = 0; cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; cursor.key_end.obj_type = 0; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; /* * This flag allows the B-Tree code to clean up loose ends. At * the moment (XXX) it also means we have to hold the sync lock * through the iteration. */ cursor.flags |= HAMMER_CURSOR_PRUNING; hammer_sync_lock_sh(trans); error = hammer_btree_last(&cursor); hammer_sync_unlock(trans); while (error == 0) { /* * Check for work */ elm = &cursor.node->ondisk->elms[cursor.index].leaf; prune->key_cur = elm->base; /* * Yield to more important tasks */ if ((error = hammer_signal_check(trans->hmp)) != 0) break; if (prune->stat_oldest_tid > elm->base.create_tid) prune->stat_oldest_tid = elm->base.create_tid; if (hammer_debug_general & 0x0200) { kprintf("check %016llx %016llx cre=%016llx del=%016llx\n", (long long)elm->base.obj_id, (long long)elm->base.key, (long long)elm->base.create_tid, (long long)elm->base.delete_tid); } if (prune_should_delete(prune, elm)) { if (hammer_debug_general & 0x0200) { kprintf("check %016llx %016llx: DELETE\n", (long long)elm->base.obj_id, (long long)elm->base.key); } /* * NOTE: This can return EDEADLK * * Acquiring the sync lock guarantees that the * operation will not cross a synchronization * boundary (see the flusher). * * We dont need to track inodes or next_tid when * we are destroying deleted records. */ isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY); hammer_sync_lock_sh(trans); error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, cursor.trans->tid, cursor.trans->time32, 0, &prune->stat_bytes); hammer_sync_unlock(trans); if (error) break; if (isdir) ++prune->stat_dirrecords; else ++prune->stat_rawrecords; /* * The current record might now be the one after * the one we deleted, set ATEDISK to force us * to skip it (since we are iterating backwards). */ cursor.flags |= HAMMER_CURSOR_ATEDISK; } else { /* * Nothing to delete, but we may have to check other * things. */ prune_check_nlinks(&cursor, elm); cursor.flags |= HAMMER_CURSOR_ATEDISK; if (hammer_debug_general & 0x0100) { kprintf("check %016llx %016llx: SKIP\n", (long long)elm->base.obj_id, (long long)elm->base.key); } } ++prune->stat_scanrecords; /* * WARNING: See warnings in hammer_unlock_cursor() function. */ while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } hammer_sync_lock_sh(trans); error = hammer_btree_iterate_reverse(&cursor); hammer_sync_unlock(trans); } if (error == ENOENT) error = 0; hammer_done_cursor(&cursor); if (error == EDEADLK) goto retry; if (error == EINTR) { prune->head.flags |= HAMMER_IOC_HEAD_INTR; error = 0; } failed: prune->key_cur.localization &= HAMMER_LOCALIZE_MASK; prune->elms = user_elms; kfree(copy_elms, M_TEMP); return(error); }
int hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_reblock *reblock) { struct hammer_cursor cursor; hammer_btree_elm_t elm; int checkspace_count; int error; int seq; int slop; /* * A fill level <= 20% is considered an emergency. free_level is * inverted from fill_level. */ if (reblock->free_level >= HAMMER_LARGEBLOCK_SIZE * 8 / 10) slop = HAMMER_CHKSPC_EMERGENCY; else slop = HAMMER_CHKSPC_REBLOCK; if ((reblock->key_beg.localization | reblock->key_end.localization) & HAMMER_LOCALIZE_PSEUDOFS_MASK) { return(EINVAL); } if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) return(EINVAL); if (reblock->free_level < 0) return(EINVAL); reblock->key_cur = reblock->key_beg; reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; reblock->key_cur.localization += ip->obj_localization; checkspace_count = 0; seq = trans->hmp->flusher.done; retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); goto failed; } cursor.key_beg.localization = reblock->key_cur.localization; cursor.key_beg.obj_id = reblock->key_cur.obj_id; cursor.key_beg.key = HAMMER_MIN_KEY; cursor.key_beg.create_tid = 1; cursor.key_beg.delete_tid = 0; cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; cursor.key_beg.obj_type = 0; cursor.key_end.localization = (reblock->key_end.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; cursor.key_end.obj_id = reblock->key_end.obj_id; cursor.key_end.key = HAMMER_MAX_KEY; cursor.key_end.create_tid = HAMMER_MAX_TID - 1; cursor.key_end.delete_tid = 0; cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; cursor.key_end.obj_type = 0; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; /* * This flag allows the btree scan code to return internal nodes, * so we can reblock them in addition to the leafs. Only specify it * if we intend to reblock B-Tree nodes. */ if (reblock->head.flags & HAMMER_IOC_DO_BTREE) cursor.flags |= HAMMER_CURSOR_REBLOCKING; error = hammer_btree_first(&cursor); while (error == 0) { /* * Internal or Leaf node */ KKASSERT(cursor.index < cursor.node->ondisk->count); elm = &cursor.node->ondisk->elms[cursor.index]; reblock->key_cur.obj_id = elm->base.obj_id; reblock->key_cur.localization = elm->base.localization; /* * Yield to more important tasks */ if ((error = hammer_signal_check(trans->hmp)) != 0) break; /* * If there is insufficient free space it may be due to * reserved bigblocks, which flushing might fix. * * We must force a retest in case the unlocked cursor is * moved to the end of the leaf, or moved to an internal * node. * * WARNING: See warnings in hammer_unlock_cursor() function. */ if (hammer_checkspace(trans->hmp, slop)) { if (++checkspace_count == 10) { error = ENOSPC; break; } hammer_unlock_cursor(&cursor); cursor.flags |= HAMMER_CURSOR_RETEST; hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async(trans->hmp, NULL); goto skip; } /* * Acquiring the sync_lock prevents the operation from * crossing a synchronization boundary. * * NOTE: cursor.node may have changed on return. * * WARNING: See warnings in hammer_unlock_cursor() function. */ hammer_sync_lock_sh(trans); error = hammer_reblock_helper(reblock, &cursor, elm); hammer_sync_unlock(trans); while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } /* * Setup for iteration, our cursor flags may be modified by * other threads while we are unlocked. */ cursor.flags |= HAMMER_CURSOR_ATEDISK; /* * We allocate data buffers, which atm we don't track * dirty levels for because we allow the kernel to write * them. But if we allocate too many we can still deadlock * the buffer cache. * * WARNING: See warnings in hammer_unlock_cursor() function. * (The cursor's node and element may change!) */ if (bd_heatup()) { hammer_unlock_cursor(&cursor); bwillwrite(HAMMER_XBUFSIZE); hammer_lock_cursor(&cursor); } /* XXX vm_wait_nominal(); */ skip: if (error == 0) { error = hammer_btree_iterate(&cursor); } } if (error == ENOENT) error = 0; hammer_done_cursor(&cursor); if (error == EWOULDBLOCK) { hammer_flusher_sync(trans->hmp); goto retry; } if (error == EDEADLK) goto retry; if (error == EINTR) { reblock->head.flags |= HAMMER_IOC_HEAD_INTR; error = 0; } failed: reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; return(error); }
/* * Reblock the B-Tree (leaf) node, record, and/or data if necessary. * * XXX We have no visibility into internal B-Tree nodes at the moment, * only leaf nodes. */ static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_mount_t hmp; hammer_off_t tmp_offset; hammer_node_ondisk_t ondisk; struct hammer_btree_leaf_elm leaf; int error; int bytes; int cur; int iocflags; error = 0; hmp = cursor->trans->hmp; /* * Reblock data. Note that data embedded in a record is reblocked * by the record reblock code. Data processing only occurs at leaf * nodes and for RECORD element types. */ if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) goto skip; if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) return(0); tmp_offset = elm->leaf.data_offset; if (tmp_offset == 0) goto skip; if (error) goto skip; /* * NOTE: Localization restrictions may also have been set-up, we can't * just set the match flags willy-nilly here. */ switch (elm->leaf.base.rec_type) { case HAMMER_RECTYPE_INODE: case HAMMER_RECTYPE_SNAPSHOT: case HAMMER_RECTYPE_CONFIG: iocflags = HAMMER_IOC_DO_INODES; break; case HAMMER_RECTYPE_EXT: case HAMMER_RECTYPE_FIX: case HAMMER_RECTYPE_PFS: case HAMMER_RECTYPE_DIRENTRY: iocflags = HAMMER_IOC_DO_DIRS; break; case HAMMER_RECTYPE_DATA: case HAMMER_RECTYPE_DB: iocflags = HAMMER_IOC_DO_DATA; break; default: iocflags = 0; break; } if (reblock->head.flags & iocflags) { ++reblock->data_count; reblock->data_byte_count += elm->leaf.data_len; bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("D %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && bytes >= reblock->free_level) { /* * This is nasty, the uncache code may have to get * vnode locks and because of that we can't hold * the cursor locked. * * WARNING: See warnings in hammer_unlock_cursor() * function. */ leaf = elm->leaf; hammer_unlock_cursor(cursor); hammer_io_direct_uncache(hmp, &leaf); hammer_lock_cursor(cursor); /* * elm may have become stale or invalid, reload it. * ondisk variable is temporary only. Note that * cursor->node and thus cursor->node->ondisk may * also changed. */ ondisk = cursor->node->ondisk; elm = &ondisk->elms[cursor->index]; if (cursor->flags & HAMMER_CURSOR_RETEST) { kprintf("hammer: debug: retest on " "reblocker uncache\n"); error = EDEADLK; } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || cursor->index >= ondisk->count) { kprintf("hammer: debug: shifted on " "reblocker uncache\n"); error = EDEADLK; } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { kprintf("hammer: debug: changed on " "reblocker uncache\n"); error = EDEADLK; } if (error == 0) error = hammer_cursor_upgrade(cursor); if (error == 0) { KKASSERT(cursor->index < ondisk->count); error = hammer_reblock_data(reblock, cursor, elm); } if (error == 0) { ++reblock->data_moves; reblock->data_byte_moves += elm->leaf.data_len; } } } skip: /* * Reblock a B-Tree internal or leaf node. A leaf node is reblocked * on initial entry only (element 0). An internal node is reblocked * when entered upward from its first leaf node only (also element 0). * Further revisits of the internal node (index > 0) are ignored. */ tmp_offset = cursor->node->node_offset; if (cursor->index == 0 && error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { ++reblock->btree_count; bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("B %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && bytes >= reblock->free_level) { error = hammer_cursor_upgrade(cursor); if (error == 0) { if (cursor->parent) { KKASSERT(cursor->parent_index < cursor->parent->ondisk->count); elm = &cursor->parent->ondisk->elms[cursor->parent_index]; } else { elm = NULL; } switch(cursor->node->ondisk->type) { case HAMMER_BTREE_TYPE_LEAF: error = hammer_reblock_leaf_node( reblock, cursor, elm); break; case HAMMER_BTREE_TYPE_INTERNAL: error = hammer_reblock_int_node( reblock, cursor, elm); break; default: panic("Illegal B-Tree node type"); } } if (error == 0) { ++reblock->btree_moves; } } } hammer_cursor_downgrade(cursor); return(error); }
/* * Reblock the B-Tree (leaf) node, record, and/or data if necessary. * * XXX We have no visibility into internal B-Tree nodes at the moment, * only leaf nodes. */ static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, hammer_cursor_t cursor, hammer_btree_elm_t elm) { hammer_mount_t hmp; hammer_off_t tmp_offset; struct hammer_btree_leaf_elm leaf; int error; int bytes; int cur; int iocflags; error = 0; hmp = cursor->trans->hmp; /* * Reblock data. Note that data embedded in a record is reblocked * by the record reblock code. Data processing only occurs at leaf * nodes and for RECORD element types. */ if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) goto skip; if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) return(0); tmp_offset = elm->leaf.data_offset; if (tmp_offset == 0) goto skip; if (error) goto skip; /* * NOTE: Localization restrictions may also have been set-up, we can't * just set the match flags willy-nilly here. */ switch(elm->leaf.base.rec_type) { case HAMMER_RECTYPE_INODE: iocflags = HAMMER_IOC_DO_INODES; break; case HAMMER_RECTYPE_EXT: case HAMMER_RECTYPE_FIX: case HAMMER_RECTYPE_PFS: case HAMMER_RECTYPE_DIRENTRY: iocflags = HAMMER_IOC_DO_DIRS; break; case HAMMER_RECTYPE_DATA: case HAMMER_RECTYPE_DB: iocflags = HAMMER_IOC_DO_DATA; break; default: iocflags = 0; break; } if (reblock->head.flags & iocflags) { ++reblock->data_count; reblock->data_byte_count += elm->leaf.data_len; bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("D %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && bytes >= reblock->free_level) { /* * This is nasty, the uncache code may have to get * vnode locks and because of that we can't hold * the cursor locked. */ leaf = elm->leaf; hammer_unlock_cursor(cursor, 0); hammer_io_direct_uncache(hmp, &leaf); hammer_lock_cursor(cursor, 0); if (cursor->flags & HAMMER_CURSOR_RETEST) { kprintf("hammer: retest after uncache\n"); error = EDEADLK; } else { KKASSERT(bcmp(&elm->leaf, &leaf, sizeof(leaf)) == 0); } if (error == 0) error = hammer_cursor_upgrade(cursor); if (error == 0) { error = hammer_reblock_data(reblock, cursor, elm); } if (error == 0) { ++reblock->data_moves; reblock->data_byte_moves += elm->leaf.data_len; } } } skip: /* * Reblock a B-Tree internal or leaf node. */ tmp_offset = cursor->node->node_offset; if (cursor->index == 0 && error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { ++reblock->btree_count; bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); if (hammer_debug_general & 0x4000) kprintf("B %6d/%d\n", bytes, reblock->free_level); if (error == 0 && (cur == 0 || reblock->free_level == 0) && bytes >= reblock->free_level) { error = hammer_cursor_upgrade(cursor); if (error == 0) { if (cursor->parent) elm = &cursor->parent->ondisk->elms[cursor->parent_index]; else elm = NULL; switch(cursor->node->ondisk->type) { case HAMMER_BTREE_TYPE_LEAF: error = hammer_reblock_leaf_node( reblock, cursor, elm); break; case HAMMER_BTREE_TYPE_INTERNAL: error = hammer_reblock_int_node( reblock, cursor, elm); break; default: panic("Illegal B-Tree node type"); } } if (error == 0) { ++reblock->btree_moves; } } } hammer_cursor_downgrade(cursor); return(error); }