/* * Helper function - perform rollback on a B-Tree element given trunc_tid. * * If create_tid >= trunc_tid the record is physically destroyed. * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. */ static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) { hammer_btree_leaf_elm_t elm; int error; elm = &cursor->node->ondisk->elms[cursor->index].leaf; if (elm->base.create_tid < trunc_tid && elm->base.delete_tid < trunc_tid) { return(0); } if (elm->base.create_tid >= trunc_tid) { error = hammer_delete_at_cursor( cursor, HAMMER_DELETE_DESTROY, cursor->trans->tid, cursor->trans->time32, 1, NULL); } else if (elm->base.delete_tid >= trunc_tid) { error = hammer_delete_at_cursor( cursor, HAMMER_DELETE_ADJUST, 0, 0, 1, NULL); } else { error = 0; } return(error); }
/* * As part of the mirror write we iterate across swaths of records * on the target which no longer exist on the source, and mark them * deleted. * * The caller has indexed the cursor and set up key_end. We iterate * through to key_end. * * There is an edge case where the master has deleted a record whos * create_tid exactly matches our end_tid. We cannot delete this * record on the slave yet because we cannot assign delete_tid == create_tid. * The deletion should be picked up on the next sequence since in order * to have been deleted on the master a transaction must have occured with * a TID greater then the create_tid of the record. * * To support incremental re-mirroring, just for robustness, we do not * touch any records created beyond (or equal to) mirror->tid_end. */ static int hammer_mirror_delete_to(hammer_cursor_t cursor, struct hammer_ioc_mirror_rw *mirror) { hammer_btree_leaf_elm_t elm; int error; error = hammer_btree_iterate(cursor); while (error == 0) { elm = &cursor->node->ondisk->elms[cursor->index].leaf; KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); cursor->flags |= HAMMER_CURSOR_ATEDISK; /* * Certain records are not part of the mirroring operation */ if (hammer_mirror_nomirror(&elm->base)) { error = hammer_btree_iterate(cursor); continue; } /* * Note: Must still delete records with create_tid < tid_beg, * as record may have been pruned-away on source. */ if (elm->base.delete_tid == 0 && elm->base.create_tid < mirror->tid_end) { error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, mirror->tid_end, time_second, 1, NULL); } if (error == 0) error = hammer_btree_iterate(cursor); } if (error == ENOENT) error = 0; return(error); }
/* * Update a record in-place. Only the delete_tid can change, and * only from zero to non-zero. */ static int hammer_mirror_update(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec) { int error; /* * This case shouldn't occur. */ if (mrec->leaf.base.delete_tid == 0) return(0); /* * Mark the record deleted on the mirror target. */ error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, mrec->leaf.base.delete_tid, mrec->leaf.delete_ts, 1, NULL); cursor->flags |= HAMMER_CURSOR_ATEDISK; return(error); }
/* * Retrieve the PFS hammer cleanup utility config record. This is * different (newer than) the PFS config. * * This is kinda a hack. */ static int hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_config *config) { struct hammer_btree_leaf_elm leaf; struct hammer_cursor cursor; hammer_mount_t hmp = ip->hmp; int error; again: error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } bzero(&leaf, sizeof(leaf)); leaf.base.obj_id = HAMMER_OBJID_ROOT; leaf.base.rec_type = HAMMER_RECTYPE_CONFIG; leaf.base.create_tid = hammer_alloc_tid(hmp, 1); leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; leaf.base.key = 0; /* page 0 */ leaf.data_len = sizeof(struct hammer_config_data); cursor.key_beg = leaf.base; cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF; error = hammer_btree_lookup(&cursor); if (error == 0) { error = hammer_btree_extract_data(&cursor); error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, 0, 0, 0, NULL); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } } if (error == ENOENT) error = 0; if (error == 0) { /* * NOTE: Must reload key_beg after an ASOF search because * the create_tid may have been modified during the * search. */ cursor.flags &= ~HAMMER_CURSOR_ASOF; cursor.key_beg = leaf.base; error = hammer_create_at_cursor(&cursor, &leaf, &config->config, HAMMER_CREATE_MODE_SYS); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } } config->head.error = error; hammer_done_cursor(&cursor); return(0); }
/* * Delete snapshot transaction id(s) from the list of snapshots. */ static int hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_snapshot *snap) { hammer_mount_t hmp = ip->hmp; struct hammer_cursor cursor; int error; /* * Validate structure */ if (snap->count > HAMMER_SNAPS_PER_IOCTL) return (EINVAL); if (snap->index >= snap->count) return (EINVAL); hammer_lock_ex(&hmp->snapshot_lock); again: /* * Look for keys starting after the previous iteration, or at * the beginning if snap->count is 0. */ error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT; cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_ASOF; while (snap->index < snap->count) { cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid; error = hammer_btree_lookup(&cursor); if (error) break; error = hammer_btree_extract_leaf(&cursor); if (error) break; error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, 0, 0, 0, NULL); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } if (error) break; ++snap->index; } snap->head.error = error; hammer_done_cursor(&cursor); hammer_unlock(&hmp->snapshot_lock); return(0); }
int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_prune *prune) { struct hammer_cursor cursor; hammer_btree_leaf_elm_t elm; struct hammer_ioc_prune_elm *copy_elms; struct hammer_ioc_prune_elm *user_elms; int error; int isdir; int elm_array_size; int seq; if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS) return(EINVAL); if ((prune->key_beg.localization | prune->key_end.localization) & HAMMER_LOCALIZE_PSEUDOFS_MASK) { return(EINVAL); } if (prune->key_beg.localization > prune->key_end.localization) return(EINVAL); if (prune->key_beg.localization == prune->key_end.localization) { if (prune->key_beg.obj_id > prune->key_end.obj_id) return(EINVAL); /* key-space limitations - no check needed */ } if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms) return(EINVAL); /* 22 EINVAL */ prune->key_cur.localization = (prune->key_end.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; prune->key_cur.obj_id = prune->key_end.obj_id; prune->key_cur.key = HAMMER_MAX_KEY; /* * Copy element array from userland */ elm_array_size = sizeof(*copy_elms) * prune->nelms; user_elms = prune->elms; copy_elms = kmalloc(elm_array_size, M_TEMP, M_WAITOK); if ((error = copyin(user_elms, copy_elms, elm_array_size)) != 0) goto failed; prune->elms = copy_elms; seq = trans->hmp->flusher.done; /* * Scan backwards. Retries typically occur if a deadlock is detected. */ retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); goto failed; } cursor.key_beg.localization = (prune->key_beg.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; cursor.key_beg.obj_id = prune->key_beg.obj_id; cursor.key_beg.key = HAMMER_MIN_KEY; cursor.key_beg.create_tid = 1; cursor.key_beg.delete_tid = 0; cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; cursor.key_beg.obj_type = 0; cursor.key_end.localization = prune->key_cur.localization; cursor.key_end.obj_id = prune->key_cur.obj_id; cursor.key_end.key = prune->key_cur.key; cursor.key_end.create_tid = HAMMER_MAX_TID - 1; cursor.key_end.delete_tid = 0; cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; cursor.key_end.obj_type = 0; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; /* * This flag allows the B-Tree code to clean up loose ends. At * the moment (XXX) it also means we have to hold the sync lock * through the iteration. */ cursor.flags |= HAMMER_CURSOR_PRUNING; hammer_sync_lock_sh(trans); error = hammer_btree_last(&cursor); hammer_sync_unlock(trans); while (error == 0) { /* * Check for work */ elm = &cursor.node->ondisk->elms[cursor.index].leaf; prune->key_cur = elm->base; /* * Yield to more important tasks */ if ((error = hammer_signal_check(trans->hmp)) != 0) break; if (prune->stat_oldest_tid > elm->base.create_tid) prune->stat_oldest_tid = elm->base.create_tid; if (hammer_debug_general & 0x0200) { kprintf("check %016llx %016llx cre=%016llx del=%016llx\n", (long long)elm->base.obj_id, (long long)elm->base.key, (long long)elm->base.create_tid, (long long)elm->base.delete_tid); } if (prune_should_delete(prune, elm)) { if (hammer_debug_general & 0x0200) { kprintf("check %016llx %016llx: DELETE\n", (long long)elm->base.obj_id, (long long)elm->base.key); } /* * NOTE: This can return EDEADLK * * Acquiring the sync lock guarantees that the * operation will not cross a synchronization * boundary (see the flusher). * * We dont need to track inodes or next_tid when * we are destroying deleted records. */ isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY); hammer_sync_lock_sh(trans); error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, cursor.trans->tid, cursor.trans->time32, 0, &prune->stat_bytes); hammer_sync_unlock(trans); if (error) break; if (isdir) ++prune->stat_dirrecords; else ++prune->stat_rawrecords; /* * The current record might now be the one after * the one we deleted, set ATEDISK to force us * to skip it (since we are iterating backwards). */ cursor.flags |= HAMMER_CURSOR_ATEDISK; } else { /* * Nothing to delete, but we may have to check other * things. */ prune_check_nlinks(&cursor, elm); cursor.flags |= HAMMER_CURSOR_ATEDISK; if (hammer_debug_general & 0x0100) { kprintf("check %016llx %016llx: SKIP\n", (long long)elm->base.obj_id, (long long)elm->base.key); } } ++prune->stat_scanrecords; /* * WARNING: See warnings in hammer_unlock_cursor() function. */ while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } hammer_sync_lock_sh(trans); error = hammer_btree_iterate_reverse(&cursor); hammer_sync_unlock(trans); } if (error == ENOENT) error = 0; hammer_done_cursor(&cursor); if (error == EDEADLK) goto retry; if (error == EINTR) { prune->head.flags |= HAMMER_IOC_HEAD_INTR; error = 0; } failed: prune->key_cur.localization &= HAMMER_LOCALIZE_MASK; prune->elms = user_elms; kfree(copy_elms, M_TEMP); return(error); }