static int hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_data *data) { struct hammer_cursor cursor; int bytes; int error; /* XXX cached inode ? */ error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) goto failed; cursor.key_beg = data->elm; cursor.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_lookup(&cursor); if (error == 0) { error = hammer_btree_extract_data(&cursor); if (error == 0) { data->leaf = *cursor.leaf; bytes = cursor.leaf->data_len; if (bytes > data->size) bytes = data->size; error = copyout(cursor.data, data->ubuf, bytes); } } failed: hammer_done_cursor(&cursor); return (error); }
/* * This works like write_rec but no write or update is necessary, * and no data payload is included so we couldn't do a write even * if we wanted to. * * We must still iterate for deletions, and we can validate the * record header which is a good way to test for corrupted mirror * targets XXX. * * mirror->key_cur must be carefully set when we succeed in processing * this mrec. */ static int hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec, struct hammer_ioc_mirror_rw *mirror, u_int32_t localization) { int error; /* * Re-localize for target. Relocalization of data is handled * by hammer_mirror_write(). */ mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; mrec->leaf.base.localization += localization; /* * Delete records through until we reach (non-inclusively) the * target record. */ cursor->key_end = mrec->leaf.base; cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; cursor->flags |= HAMMER_CURSOR_BACKEND; error = hammer_mirror_delete_to(cursor, mirror); /* * Certain records are not part of the mirroring operation */ if (hammer_mirror_nomirror(&mrec->leaf.base)) return(0); /* * Locate the record and get past it by setting ATEDISK. Perform * any necessary deletions. We have no data payload and cannot * create a new record. */ if (error == 0) { mirror->key_cur = mrec->leaf.base; cursor->key_beg = mrec->leaf.base; cursor->flags |= HAMMER_CURSOR_BACKEND; cursor->flags &= ~HAMMER_CURSOR_INSERT; error = hammer_btree_lookup(cursor); if (error == 0) { if (hammer_mirror_check(cursor, mrec)) error = hammer_mirror_update(cursor, mrec); cursor->flags |= HAMMER_CURSOR_ATEDISK; } else { cursor->flags &= ~HAMMER_CURSOR_ATEDISK; } if (error == ENOENT) error = 0; } return(error); }
/* * Handle skip records. * * We must iterate from the last resolved record position at mirror->key_cur * to skip_beg non-inclusive and delete any records encountered. * * mirror->key_cur must be carefully set when we succeed in processing * this mrec. */ static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor, struct hammer_ioc_mrecord_skip *mrec, struct hammer_ioc_mirror_rw *mirror, u_int32_t localization) { int error; /* * Relocalize the skip range */ mrec->skip_beg.localization &= HAMMER_LOCALIZE_MASK; mrec->skip_beg.localization += localization; mrec->skip_end.localization &= HAMMER_LOCALIZE_MASK; mrec->skip_end.localization += localization; /* * Iterate from current position to skip_beg, deleting any records * we encounter. The record at skip_beg is not included (it is * skipped). */ cursor->key_end = mrec->skip_beg; cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; cursor->flags |= HAMMER_CURSOR_BACKEND; error = hammer_mirror_delete_to(cursor, mirror); /* * Now skip past the skip (which is the whole point point of * having a skip record). The sender has not sent us any records * for the skip area so we wouldn't know what to keep and what * to delete anyway. * * Clear ATEDISK because skip_end is non-inclusive, so we can't * count an exact match if we happened to get one. */ if (error == 0) { mirror->key_cur = mrec->skip_end; cursor->key_beg = mrec->skip_end; error = hammer_btree_lookup(cursor); cursor->flags &= ~HAMMER_CURSOR_ATEDISK; if (error == ENOENT) error = 0; } return(error); }
/* * Retrieve the PFS hammer cleanup utility config record. This is * different (newer than) the PFS config. */ static int hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_config *config) { struct hammer_cursor cursor; int error; error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; cursor.key_beg.rec_type = HAMMER_RECTYPE_CONFIG; cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; cursor.key_beg.key = 0; /* config space page 0 */ cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_ASOF; error = hammer_btree_lookup(&cursor); if (error == 0) { error = hammer_btree_extract_data(&cursor); if (error == 0) config->config = cursor.data->config; } /* error can be ENOENT */ config->head.error = error; hammer_done_cursor(&cursor); return(0); }
/* * Write out a new record. */ static int hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec, char *udata) { hammer_transaction_t trans; hammer_buffer_t data_buffer; hammer_off_t ndata_offset; hammer_tid_t high_tid; void *ndata; int error; int doprop; trans = cursor->trans; data_buffer = NULL; /* * Get the sync lock so the whole mess is atomic */ hammer_sync_lock_sh(trans); /* * Allocate and adjust data */ if (mrec->leaf.data_len && mrec->leaf.data_offset) { ndata = hammer_alloc_data(trans, mrec->leaf.data_len, mrec->leaf.base.rec_type, &ndata_offset, &data_buffer, 0, &error); if (ndata == NULL) return(error); mrec->leaf.data_offset = ndata_offset; hammer_modify_buffer(trans, data_buffer, NULL, 0); error = copyin(udata, ndata, mrec->leaf.data_len); if (error == 0) { if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) { kprintf("data crc mismatch on pipe\n"); error = EINVAL; } else { error = hammer_mirror_localize_data( ndata, &mrec->leaf); } } hammer_modify_buffer_done(data_buffer); } else { mrec->leaf.data_offset = 0; error = 0; ndata = NULL; } if (error) goto failed; /* * Do the insertion. This can fail with a EDEADLK or EALREADY */ cursor->flags |= HAMMER_CURSOR_INSERT; error = hammer_btree_lookup(cursor); if (error != ENOENT) { if (error == 0) error = EALREADY; goto failed; } error = hammer_btree_insert(cursor, &mrec->leaf, &doprop); /* * Cursor is left on the current element, we want to skip it now. */ cursor->flags |= HAMMER_CURSOR_ATEDISK; cursor->flags &= ~HAMMER_CURSOR_INSERT; /* * Track a count of active inodes. */ if (error == 0 && mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE && mrec->leaf.base.delete_tid == 0) { hammer_modify_volume_field(trans, trans->rootvol, vol0_stat_inodes); ++trans->hmp->rootvol->ondisk->vol0_stat_inodes; hammer_modify_volume_done(trans->rootvol); } /* * vol0_next_tid must track the highest TID stored in the filesystem. * We do not need to generate undo for this update. */ high_tid = mrec->leaf.base.create_tid; if (high_tid < mrec->leaf.base.delete_tid) high_tid = mrec->leaf.base.delete_tid; if (trans->rootvol->ondisk->vol0_next_tid < high_tid) { hammer_modify_volume(trans, trans->rootvol, NULL, 0); trans->rootvol->ondisk->vol0_next_tid = high_tid; hammer_modify_volume_done(trans->rootvol); } /* * WARNING! cursor's leaf pointer may have changed after * do_propagation returns. */ if (error == 0 && doprop) hammer_btree_do_propagation(cursor, NULL, &mrec->leaf); failed: /* * Cleanup */ if (error && mrec->leaf.data_offset) { hammer_blockmap_free(cursor->trans, mrec->leaf.data_offset, mrec->leaf.data_len); } hammer_sync_unlock(trans); if (data_buffer) hammer_rel_buffer(data_buffer, 0); return(error); }
/* * Handle B-Tree records. * * We must iterate to mrec->base.key (non-inclusively), and then process * the record. We are allowed to write a new record or delete an existing * record, but cannot replace an existing record. * * mirror->key_cur must be carefully set when we succeed in processing * this mrec. */ static int hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec, struct hammer_ioc_mirror_rw *mirror, u_int32_t localization, char *uptr) { int error; if (mrec->leaf.data_len < 0 || mrec->leaf.data_len > HAMMER_XBUFSIZE || mrec->leaf.data_len + sizeof(*mrec) > mrec->head.rec_size) { return(EINVAL); } /* * Re-localize for target. relocalization of data is handled * by hammer_mirror_write(). */ mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; mrec->leaf.base.localization += localization; /* * Delete records through until we reach (non-inclusively) the * target record. */ cursor->key_end = mrec->leaf.base; cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; cursor->flags |= HAMMER_CURSOR_BACKEND; error = hammer_mirror_delete_to(cursor, mirror); /* * Certain records are not part of the mirroring operation */ if (error == 0 && hammer_mirror_nomirror(&mrec->leaf.base)) return(0); /* * Locate the record. * * If the record exists only the delete_tid may be updated. * * If the record does not exist we can create it only if the * create_tid is not too old. If the create_tid is too old * it may have already been destroyed on the slave from pruning. * * Note that mirror operations are effectively as-of operations * and delete_tid can be 0 for mirroring purposes even if it is * not actually 0 at the originator. * * These functions can return EDEADLK */ if (error == 0) { cursor->key_beg = mrec->leaf.base; cursor->flags |= HAMMER_CURSOR_BACKEND; cursor->flags &= ~HAMMER_CURSOR_INSERT; error = hammer_btree_lookup(cursor); } if (error == 0 && hammer_mirror_check(cursor, mrec)) { error = hammer_mirror_update(cursor, mrec); } else if (error == ENOENT) { if (mrec->leaf.base.create_tid >= mirror->tid_beg) { error = hammer_create_at_cursor( cursor, &mrec->leaf, uptr, HAMMER_CREATE_MODE_UMIRROR); } else { error = 0; } } if (error == 0 || error == EALREADY) mirror->key_cur = mrec->leaf.base; return(error); }
/* * Retrieve the PFS hammer cleanup utility config record. This is * different (newer than) the PFS config. * * This is kinda a hack. */ static int hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_config *config) { struct hammer_btree_leaf_elm leaf; struct hammer_cursor cursor; hammer_mount_t hmp = ip->hmp; int error; again: error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } bzero(&leaf, sizeof(leaf)); leaf.base.obj_id = HAMMER_OBJID_ROOT; leaf.base.rec_type = HAMMER_RECTYPE_CONFIG; leaf.base.create_tid = hammer_alloc_tid(hmp, 1); leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; leaf.base.key = 0; /* page 0 */ leaf.data_len = sizeof(struct hammer_config_data); cursor.key_beg = leaf.base; cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF; error = hammer_btree_lookup(&cursor); if (error == 0) { error = hammer_btree_extract_data(&cursor); error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, 0, 0, 0, NULL); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } } if (error == ENOENT) error = 0; if (error == 0) { /* * NOTE: Must reload key_beg after an ASOF search because * the create_tid may have been modified during the * search. */ cursor.flags &= ~HAMMER_CURSOR_ASOF; cursor.key_beg = leaf.base; error = hammer_create_at_cursor(&cursor, &leaf, &config->config, HAMMER_CREATE_MODE_SYS); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } } config->head.error = error; hammer_done_cursor(&cursor); return(0); }
/* * Delete snapshot transaction id(s) from the list of snapshots. */ static int hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_snapshot *snap) { hammer_mount_t hmp = ip->hmp; struct hammer_cursor cursor; int error; /* * Validate structure */ if (snap->count > HAMMER_SNAPS_PER_IOCTL) return (EINVAL); if (snap->index >= snap->count) return (EINVAL); hammer_lock_ex(&hmp->snapshot_lock); again: /* * Look for keys starting after the previous iteration, or at * the beginning if snap->count is 0. */ error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT; cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_ASOF; while (snap->index < snap->count) { cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid; error = hammer_btree_lookup(&cursor); if (error) break; error = hammer_btree_extract_leaf(&cursor); if (error) break; error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY, 0, 0, 0, NULL); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } if (error) break; ++snap->index; } snap->head.error = error; hammer_done_cursor(&cursor); hammer_unlock(&hmp->snapshot_lock); return(0); }
/* * Add a snapshot transaction id(s) to the list of snapshots. * * NOTE: Records are created with an allocated TID. If a flush cycle * is in progress the record may be synced in the current flush * cycle and the volume header will reflect the allocation of the * TID, but the synchronization point may not catch up to the * TID until the next flush cycle. */ static int hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_snapshot *snap) { hammer_mount_t hmp = ip->hmp; struct hammer_btree_leaf_elm leaf; struct hammer_cursor cursor; int error; /* * Validate structure */ if (snap->count > HAMMER_SNAPS_PER_IOCTL) return (EINVAL); if (snap->index >= snap->count) return (EINVAL); hammer_lock_ex(&hmp->snapshot_lock); again: /* * Look for keys starting after the previous iteration, or at * the beginning if snap->count is 0. */ error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); } cursor.asof = HAMMER_MAX_TID; cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF; bzero(&leaf, sizeof(leaf)); leaf.base.obj_id = HAMMER_OBJID_ROOT; leaf.base.rec_type = HAMMER_RECTYPE_SNAPSHOT; leaf.base.create_tid = hammer_alloc_tid(hmp, 1); leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE; leaf.data_len = sizeof(struct hammer_snapshot_data); while (snap->index < snap->count) { leaf.base.key = (int64_t)snap->snaps[snap->index].tid; cursor.key_beg = leaf.base; error = hammer_btree_lookup(&cursor); if (error == 0) { error = EEXIST; break; } /* * NOTE: Must reload key_beg after an ASOF search because * the create_tid may have been modified during the * search. */ cursor.flags &= ~HAMMER_CURSOR_ASOF; cursor.key_beg = leaf.base; error = hammer_create_at_cursor(&cursor, &leaf, &snap->snaps[snap->index], HAMMER_CREATE_MODE_SYS); if (error == EDEADLK) { hammer_done_cursor(&cursor); goto again; } cursor.flags |= HAMMER_CURSOR_ASOF; if (error) break; ++snap->index; } snap->head.error = error; hammer_done_cursor(&cursor); hammer_unlock(&hmp->snapshot_lock); return(0); }
int hammer_ioc_dedup(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_dedup *dedup) { struct hammer_cursor cursor1, cursor2; int error; int seq; /* * Enforce hammer filesystem version requirements */ if (trans->hmp->version < HAMMER_VOL_VERSION_FIVE) { kprintf("hammer: Filesystem must be upgraded to v5 " "before you can run dedup\n"); return (EOPNOTSUPP); /* 95*/ } /* * Cursor1, return an error -> candidate goes to pass2 list */ error = hammer_init_cursor(trans, &cursor1, NULL, NULL); if (error) goto done_cursor; cursor1.key_beg = dedup->elm1; cursor1.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_lookup(&cursor1); if (error) goto done_cursor; error = hammer_btree_extract(&cursor1, HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA); if (error) goto done_cursor; /* * Cursor2, return an error -> candidate goes to pass2 list */ error = hammer_init_cursor(trans, &cursor2, NULL, NULL); if (error) goto done_cursors; cursor2.key_beg = dedup->elm2; cursor2.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_lookup(&cursor2); if (error) goto done_cursors; error = hammer_btree_extract(&cursor2, HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA); if (error) goto done_cursors; /* * Zone validation. We can't de-dup any of the other zones * (BTREE or META) or bad things will happen. * * Return with error = 0, but set an INVALID_ZONE flag. */ error = validate_zone(cursor1.leaf->data_offset) + validate_zone(cursor2.leaf->data_offset); if (error) { dedup->head.flags |= HAMMER_IOC_DEDUP_INVALID_ZONE; error = 0; goto done_cursors; } /* * Comparison checks * * If zones don't match or data_len fields aren't the same * we consider it to be a comparison failure. * * Return with error = 0, but set a CMP_FAILURE flag. */ if ((cursor1.leaf->data_offset & HAMMER_OFF_ZONE_MASK) != (cursor2.leaf->data_offset & HAMMER_OFF_ZONE_MASK)) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } if (cursor1.leaf->data_len != cursor2.leaf->data_len) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } /* byte-by-byte comparison to be sure */ if (bcmp(cursor1.data, cursor2.data, cursor1.leaf->data_len)) { dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE; goto done_cursors; } /* * Upgrade both cursors together to an exclusive lock * * Return an error -> candidate goes to pass2 list */ hammer_sync_lock_sh(trans); error = hammer_cursor_upgrade2(&cursor1, &cursor2); if (error) { hammer_sync_unlock(trans); goto done_cursors; } error = hammer_blockmap_dedup(cursor1.trans, cursor1.leaf->data_offset, cursor1.leaf->data_len); if (error) { if (error == ERANGE) { /* * Return with error = 0, but set an UNDERFLOW flag */ dedup->head.flags |= HAMMER_IOC_DEDUP_UNDERFLOW; error = 0; goto downgrade_cursors; } else { /* * Return an error -> block goes to pass2 list */ goto downgrade_cursors; } } /* * The cursor2's cache must be invalidated before calling * hammer_blockmap_free(), otherwise it will not be able to * invalidate the underlying data buffer. */ hammer_cursor_invalidate_cache(&cursor2); hammer_blockmap_free(cursor2.trans, cursor2.leaf->data_offset, cursor2.leaf->data_len); hammer_modify_node(cursor2.trans, cursor2.node, &cursor2.leaf->data_offset, sizeof(hammer_off_t)); cursor2.leaf->data_offset = cursor1.leaf->data_offset; hammer_modify_node_done(cursor2.node); downgrade_cursors: hammer_cursor_downgrade2(&cursor1, &cursor2); hammer_sync_unlock(trans); done_cursors: hammer_done_cursor(&cursor2); done_cursor: hammer_done_cursor(&cursor1); /* * Avoid deadlocking the buffer cache */ seq = trans->hmp->flusher.done; while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_flusher_wait(trans->hmp, seq); seq = hammer_flusher_async_one(trans->hmp); } return (error); }