Example #1
0
/*
 * Helper function - perform rollback on a B-Tree element given trunc_tid.
 *
 * If create_tid >= trunc_tid the record is physically destroyed.
 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
 */
static
int
hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
{
	hammer_btree_leaf_elm_t elm;
	int error;

	elm = &cursor->node->ondisk->elms[cursor->index].leaf;
	if (elm->base.create_tid < trunc_tid &&
	    elm->base.delete_tid < trunc_tid) {
		return(0);
	}

	if (elm->base.create_tid >= trunc_tid) {
		error = hammer_delete_at_cursor(
				cursor, HAMMER_DELETE_DESTROY,
				cursor->trans->tid, cursor->trans->time32,
				1, NULL);
	} else if (elm->base.delete_tid >= trunc_tid) {
		error = hammer_delete_at_cursor(
				cursor, HAMMER_DELETE_ADJUST,
				0, 0,
				1, NULL);
	} else {
		error = 0;
	}
	return(error);
}
Example #2
0
/*
 * As part of the mirror write we iterate across swaths of records
 * on the target which no longer exist on the source, and mark them
 * deleted.
 *
 * The caller has indexed the cursor and set up key_end.  We iterate
 * through to key_end.
 *
 * There is an edge case where the master has deleted a record whos
 * create_tid exactly matches our end_tid.  We cannot delete this
 * record on the slave yet because we cannot assign delete_tid == create_tid.
 * The deletion should be picked up on the next sequence since in order
 * to have been deleted on the master a transaction must have occured with
 * a TID greater then the create_tid of the record.
 *
 * To support incremental re-mirroring, just for robustness, we do not
 * touch any records created beyond (or equal to) mirror->tid_end.
 */
static
int
hammer_mirror_delete_to(hammer_cursor_t cursor,
		       struct hammer_ioc_mirror_rw *mirror)
{
	hammer_btree_leaf_elm_t elm;
	int error;

	error = hammer_btree_iterate(cursor);
	while (error == 0) {
		elm = &cursor->node->ondisk->elms[cursor->index].leaf;
		KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
		cursor->flags |= HAMMER_CURSOR_ATEDISK;

		/*
		 * Certain records are not part of the mirroring operation
		 */
		if (hammer_mirror_nomirror(&elm->base)) {
			error = hammer_btree_iterate(cursor);
			continue;
		}

		/*
		 * Note: Must still delete records with create_tid < tid_beg,
		 *	 as record may have been pruned-away on source.
		 */
		if (elm->base.delete_tid == 0 &&
		    elm->base.create_tid < mirror->tid_end) {
			error = hammer_delete_at_cursor(cursor,
							HAMMER_DELETE_ADJUST,
							mirror->tid_end,
							time_second,
							1, NULL);
		}
		if (error == 0)
			error = hammer_btree_iterate(cursor);
	}
	if (error == ENOENT)
		error = 0;
	return(error);
}
Example #3
0
/*
 * Update a record in-place.  Only the delete_tid can change, and
 * only from zero to non-zero.
 */
static
int
hammer_mirror_update(hammer_cursor_t cursor,
		     struct hammer_ioc_mrecord_rec *mrec)
{
	int error;

	/*
	 * This case shouldn't occur.
	 */
	if (mrec->leaf.base.delete_tid == 0)
		return(0);

	/*
	 * Mark the record deleted on the mirror target.
	 */
	error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST,
					mrec->leaf.base.delete_tid,
					mrec->leaf.delete_ts,
					1, NULL);
	cursor->flags |= HAMMER_CURSOR_ATEDISK;
	return(error);
}
Example #4
0
/*
 * Retrieve the PFS hammer cleanup utility config record.  This is
 * different (newer than) the PFS config.
 *
 * This is kinda a hack.
 */
static
int
hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_config *config)
{
	struct hammer_btree_leaf_elm leaf;
	struct hammer_cursor cursor;
	hammer_mount_t hmp = ip->hmp;
	int error;

again:
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	bzero(&leaf, sizeof(leaf));
	leaf.base.obj_id = HAMMER_OBJID_ROOT;
	leaf.base.rec_type = HAMMER_RECTYPE_CONFIG;
	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
	leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	leaf.base.key = 0;	/* page 0 */
	leaf.data_len = sizeof(struct hammer_config_data);

	cursor.key_beg = leaf.base;

	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;

	error = hammer_btree_lookup(&cursor);
	if (error == 0) {
		error = hammer_btree_extract_data(&cursor);
		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
						0, 0, 0, NULL);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
	}
	if (error == ENOENT)
		error = 0;
	if (error == 0) {
		/*
		 * NOTE: Must reload key_beg after an ASOF search because
		 *	 the create_tid may have been modified during the
		 *	 search.
		 */
		cursor.flags &= ~HAMMER_CURSOR_ASOF;
		cursor.key_beg = leaf.base;
		error = hammer_create_at_cursor(&cursor, &leaf,
						&config->config,
						HAMMER_CREATE_MODE_SYS);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
	}
	config->head.error = error;
	hammer_done_cursor(&cursor);
	return(0);
}
Example #5
0
/*
 * Delete snapshot transaction id(s) from the list of snapshots.
 */
static
int
hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_snapshot *snap)
{
	hammer_mount_t hmp = ip->hmp;
	struct hammer_cursor cursor;
	int error;

	/*
	 * Validate structure
	 */
	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
		return (EINVAL);
	if (snap->index >= snap->count)
		return (EINVAL);

	hammer_lock_ex(&hmp->snapshot_lock);
again:
	/*
	 * Look for keys starting after the previous iteration, or at
	 * the beginning if snap->count is 0.
	 */
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
	cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_ASOF;

	while (snap->index < snap->count) {
		cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid;
		error = hammer_btree_lookup(&cursor);
		if (error)
			break;
		error = hammer_btree_extract_leaf(&cursor);
		if (error)
			break;
		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
						0, 0, 0, NULL);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
		if (error)
			break;
		++snap->index;
	}
	snap->head.error = error;
	hammer_done_cursor(&cursor);
	hammer_unlock(&hmp->snapshot_lock);
	return(0);
}
Example #6
0
int
hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
		 struct hammer_ioc_prune *prune)
{
	struct hammer_cursor cursor;
	hammer_btree_leaf_elm_t elm;
	struct hammer_ioc_prune_elm *copy_elms;
	struct hammer_ioc_prune_elm *user_elms;
	int error;
	int isdir;
	int elm_array_size;
	int seq;

	if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS)
		return(EINVAL);
	if ((prune->key_beg.localization | prune->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (prune->key_beg.localization > prune->key_end.localization)
		return(EINVAL);
	if (prune->key_beg.localization == prune->key_end.localization) {
		if (prune->key_beg.obj_id > prune->key_end.obj_id)
			return(EINVAL);
		/* key-space limitations - no check needed */
	}
	if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
		return(EINVAL);
/* 22 EINVAL */

	prune->key_cur.localization = (prune->key_end.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	prune->key_cur.obj_id = prune->key_end.obj_id;
	prune->key_cur.key = HAMMER_MAX_KEY;

	/*
	 * Copy element array from userland
	 */
	elm_array_size = sizeof(*copy_elms) * prune->nelms;
	user_elms = prune->elms;
	copy_elms = kmalloc(elm_array_size, M_TEMP, M_WAITOK);
	if ((error = copyin(user_elms, copy_elms, elm_array_size)) != 0)
		goto failed;
	prune->elms = copy_elms;

	seq = trans->hmp->flusher.done;

	/*
	 * Scan backwards.  Retries typically occur if a deadlock is detected.
	 */
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = (prune->key_beg.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	cursor.key_beg.obj_id = prune->key_beg.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = prune->key_cur.localization;
	cursor.key_end.obj_id = prune->key_cur.obj_id;
	cursor.key_end.key = prune->key_cur.key;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	/*
	 * This flag allows the B-Tree code to clean up loose ends.  At
	 * the moment (XXX) it also means we have to hold the sync lock
	 * through the iteration.
	 */
	cursor.flags |= HAMMER_CURSOR_PRUNING;

	hammer_sync_lock_sh(trans);
	error = hammer_btree_last(&cursor);
	hammer_sync_unlock(trans);

	while (error == 0) {
		/*
		 * Check for work
		 */
		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
		prune->key_cur = elm->base;

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		if (prune->stat_oldest_tid > elm->base.create_tid)
			prune->stat_oldest_tid = elm->base.create_tid;

		if (hammer_debug_general & 0x0200) {
			kprintf("check %016llx %016llx cre=%016llx del=%016llx\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key,
					(long long)elm->base.create_tid,
					(long long)elm->base.delete_tid);
		}
				
		if (prune_should_delete(prune, elm)) {
			if (hammer_debug_general & 0x0200) {
				kprintf("check %016llx %016llx: DELETE\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key);
			}

			/*
			 * NOTE: This can return EDEADLK
			 *
			 * Acquiring the sync lock guarantees that the
			 * operation will not cross a synchronization
			 * boundary (see the flusher).
			 *
			 * We dont need to track inodes or next_tid when
			 * we are destroying deleted records.
			 */
			isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);

			hammer_sync_lock_sh(trans);
			error = hammer_delete_at_cursor(&cursor,
							HAMMER_DELETE_DESTROY,
							cursor.trans->tid,
							cursor.trans->time32,
							0, &prune->stat_bytes);
			hammer_sync_unlock(trans);
			if (error)
				break;

			if (isdir)
				++prune->stat_dirrecords;
			else
				++prune->stat_rawrecords;

			/*
			 * The current record might now be the one after
			 * the one we deleted, set ATEDISK to force us
			 * to skip it (since we are iterating backwards).
			 */
			cursor.flags |= HAMMER_CURSOR_ATEDISK;
		} else {
			/*
			 * Nothing to delete, but we may have to check other
			 * things.
			 */
			prune_check_nlinks(&cursor, elm);
			cursor.flags |= HAMMER_CURSOR_ATEDISK;
			if (hammer_debug_general & 0x0100) {
				kprintf("check %016llx %016llx: SKIP\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key);
			}
		}
		++prune->stat_scanrecords;

		/*
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}
		hammer_sync_lock_sh(trans);
		error = hammer_btree_iterate_reverse(&cursor);
		hammer_sync_unlock(trans);
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		prune->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	prune->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	prune->elms = user_elms;
	kfree(copy_elms, M_TEMP);
	return(error);
}