Ejemplo n.º 1
0
static
int
hammer_ioc_get_data(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_data *data)
{
	struct hammer_cursor cursor;
	int bytes;
	int error;

	/* XXX cached inode ? */
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error)
		goto failed;

	cursor.key_beg = data->elm;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	error = hammer_btree_lookup(&cursor);
	if (error == 0) {
		error = hammer_btree_extract_data(&cursor);
		if (error == 0) {
			data->leaf = *cursor.leaf;
			bytes = cursor.leaf->data_len;
			if (bytes > data->size)
				bytes = data->size;
			error = copyout(cursor.data, data->ubuf, bytes);
		}
	}

failed:
	hammer_done_cursor(&cursor);
	return (error);
}
Ejemplo n.º 2
0
/*
 * Iterate PFS ondisk data.
 * This function essentially does the same as hammer_load_pseudofs()
 * except that this function only retrieves PFS data without touching
 * hammer_pfs_rb_tree at all.
 *
 * NOTE: The ip used for ioctl is not necessarily related to the PFS
 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
 *
 * NOTE: The API was changed in DragonFly 4.7, due to design issues
 * this ioctl and libhammer (which is the only caller of this ioctl
 * within DragonFly source, but no longer maintained by anyone) had.
 */
int
hammer_ioc_scan_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_pseudofs_rw *pfs)
{
	struct hammer_cursor cursor;
	hammer_inode_t dip;
	uint32_t localization;
	int error;

	if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
		return(error);
	localization = pfs_to_lo(pfs->pfs_id);
	pfs->bytes = sizeof(struct hammer_pseudofs_data);
	pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;

	dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
		HAMMER_DEF_LOCALIZATION, 0, &error);

	error = hammer_init_cursor(trans, &cursor,
		(dip ? &dip->cache[1] : NULL), dip);
	if (error)
		goto fail;

	cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION |
				      HAMMER_LOCALIZE_MISC;
	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.key = localization;
	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_ASOF;

	error = hammer_ip_lookup(&cursor);
	if (error == 0) {
		error = hammer_ip_resolve_data(&cursor);
		if (error == 0) {
			if (pfs->ondisk)
				copyout(cursor.data, pfs->ondisk, cursor.leaf->data_len);
			localization = cursor.leaf->base.key;
			pfs->pfs_id = lo_to_pfs(localization);
		}
	}
	hammer_done_cursor(&cursor);
fail:
	if (dip)
		hammer_rel_inode(dip, 0);
	return(error);
}
Ejemplo n.º 3
0
/*
 * Retrieve the PFS hammer cleanup utility config record.  This is
 * different (newer than) the PFS config.
 */
static
int
hammer_ioc_get_config(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_config *config)
{
	struct hammer_cursor cursor;
	int error;

	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_CONFIG;
	cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	cursor.key_beg.key = 0;		/* config space page 0 */

	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_ASOF;

	error = hammer_btree_lookup(&cursor);
	if (error == 0) {
		error = hammer_btree_extract_data(&cursor);
		if (error == 0)
			config->config = cursor.data->config;
	}
	/* error can be ENOENT */
	config->head.error = error;
	hammer_done_cursor(&cursor);
	return(0);
}
Ejemplo n.º 4
0
/*
 * Rollback the specified PFS to (trunc_tid - 1), removing everything
 * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
 * mode or the MIRROR_FILTERED scan will not work properly.
 *
 * This is typically used to remove any partial syncs when upgrading a
 * slave to a master.  It can theoretically also be used to rollback
 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
 * PRUNED, and to points that are older only if they are on a retained
 * (pruning softlink) boundary.
 *
 * Rollbacks destroy information.  If you don't mind inode numbers changing
 * a better way would be to cpdup a snapshot back onto the master.
 */
static
int
hammer_pfs_rollback(hammer_transaction_t trans,
		    hammer_pseudofs_inmem_t pfsm,
		    hammer_tid_t trunc_tid)
{
	struct hammer_cmirror cmirror;
	struct hammer_cursor cursor;
	struct hammer_base_elm key_cur;
	int error;
	int seq;

	bzero(&cmirror, sizeof(cmirror));
	bzero(&key_cur, sizeof(key_cur));
	key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization;
	key_cur.obj_id = HAMMER_MIN_OBJID;
	key_cur.key = HAMMER_MIN_KEY;
	key_cur.create_tid = 1;
	key_cur.rec_type = HAMMER_MIN_RECTYPE;

	seq = trans->hmp->flusher.done;

retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg = key_cur;
	cursor.key_end.localization = HAMMER_MAX_LOCALIZATION |
				      pfsm->localization;
	cursor.key_end.obj_id = HAMMER_MAX_OBJID;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.key_end.create_tid = HAMMER_MAX_TID;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	/*
	 * Do an optimized scan of only records created or modified
	 * >= trunc_tid, so we can fix up those records.  We must
	 * still check the TIDs but this greatly reduces the size of
	 * the scan.
	 */
	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
	cursor.cmirror = &cmirror;
	cmirror.mirror_tid = trunc_tid;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Abort the rollback.
		 */
		if (error == 0) {
			error = hammer_signal_check(trans->hmp);
			if (error)
				break;
		}

		/*
		 * We only care about leafs.  Internal nodes can be returned
		 * in mirror-filtered mode (they are used to generate SKIP
		 * mrecords), but we don't need them for this code.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		cursor.flags |= HAMMER_CURSOR_ATEDISK;
		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
			key_cur = cursor.node->ondisk->elms[cursor.index].base;
			error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
		}

		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		if (error == 0)
			error = hammer_btree_iterate(&cursor);
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EDEADLK)
		goto retry;
failed:
	return(error);
}
Ejemplo n.º 5
0
/* corresponds to hammer_vop_readdir */
int hammerfs_readdir(struct file *file, void *dirent, filldir_t filldir)
{
	struct dentry *de = file->f_dentry;
	struct hammer_transaction trans;
	struct hammer_cursor cursor;
	struct hammer_inode *ip = (struct hammer_inode *)de->d_inode->i_private;
	hammer_base_elm_t base;
	int r = 0;
	int error;
	int dtype;

	printk(KERN_INFO "hammerfs_readdir(file->f_pos=%lld)\n", file->f_pos);

	/*
	* Handle artificial entries
	*/

	if (file->f_pos == 0)
		r = filldir(dirent, ".", 1, file->f_pos++,
				de->d_inode->i_ino, DT_DIR);

	if (!r && file->f_pos == 1) {
		if (de->d_parent->d_inode)
			r = filldir(dirent, "..", 2, file->f_pos++,
					de->d_parent->d_inode->i_ino,
					DT_DIR);
		else
			r = filldir(dirent, "..", 2, file->f_pos++,
					de->d_inode->i_ino,
					DT_DIR);
	}

	if (!r) {
		hammer_simple_transaction(&trans, ip->hmp);

	/*
	* Key range (begin and end inclusive) to scan.  Directory keys
	* directly translate to a 64 bit 'seek' position.
	*/
		hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
		cursor.key_beg.localization = ip->obj_localization +
					HAMMER_LOCALIZE_MISC;
		cursor.key_beg.obj_id = ip->obj_id;
		cursor.key_beg.create_tid = 0;
		cursor.key_beg.delete_tid = 0;
		cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
		cursor.key_beg.obj_type = 0;
		cursor.key_beg.key = file->f_pos;

		cursor.key_end = cursor.key_beg;
		cursor.key_end.key = HAMMER_MAX_KEY;
		cursor.asof = ip->obj_asof;
		cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE |
				HAMMER_CURSOR_ASOF;

		error = hammer_ip_first(&cursor);

		while (error == 0) {
			error = hammer_ip_resolve_data(&cursor);
			if (error)
				break;
			base = &cursor.leaf->base;
			KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);

		if (base->obj_id != de->d_inode->i_ino)
			panic("readdir: bad record at %p", cursor.node);


       /*
	* Convert pseudo-filesystems into softlinks
	*/
			dtype = hammerfs_get_itype(cursor.leaf->base.obj_type);
			r = filldir(dirent, (void *)cursor.data->entry.name,
				cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF,
				file->f_pos, cursor.data->entry.obj_id, dtype);

			if (r)
				break;
			file->f_pos++;
			error = hammer_ip_next(&cursor);
		}

		hammer_done_cursor(&cursor);
	}

/* done: */
    /*hammer_done_transaction(&trans);*/
    /* return(1); */ /* TODO */
/*failed:
    return(1); */

	return 0;
}
Ejemplo n.º 6
0
/* corresponds to hammer_vop_nresolve */
struct dentry *hammerfs_inode_lookup(struct inode *parent_inode,
					struct dentry *dentry,
					struct nameidata *nameidata)
{
	struct hammer_transaction trans;
	struct super_block *sb;
	struct inode *inode;
	hammer_inode_t dip;
	hammer_inode_t ip;
	hammer_tid_t asof;
	struct hammer_cursor cursor;
	int64_t namekey;
	u_int32_t max_iterations;
	int64_t obj_id;
	int nlen;
	int flags;
	int error;
	u_int32_t localization;

	printk(KERN_INFO "hammerfs_inode_lookup(parent_inode->i_ino=%lu, dentry->d_name.name=%s)\n",
		parent_inode->i_ino, dentry->d_name.name);

	sb = parent_inode->i_sb;
	dip = (hammer_inode_t)parent_inode->i_private;
	asof = dip->obj_asof;
	localization = dip->obj_localization;   /* for code consistency */
	nlen = dentry->d_name.len;
	flags = dip->flags & HAMMER_INODE_RO;

	hammer_simple_transaction(&trans, dip->hmp);

   /*
    * Calculate the namekey and setup the key range for the scan.  This
    * works kinda like a chained hash table where the lower 32 bits
    * of the namekey synthesize the chain.
    *
    * The key range is inclusive of both key_beg and key_end.
    */
	namekey = hammer_directory_namekey(dip, dentry->d_name.name, nlen,
						&max_iterations);

	error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip);
	cursor.key_beg.localization = dip->obj_localization +
			HAMMER_LOCALIZE_MISC;
	cursor.key_beg.obj_id = dip->obj_id;
	cursor.key_beg.key = namekey;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
	cursor.key_beg.obj_type = 0;

	cursor.key_end = cursor.key_beg;
	cursor.key_end.key += max_iterations;
	cursor.asof = asof;
	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;

   /*
    * Scan all matching records (the chain), locate the one matching
    * the requested path component.
    *
    * The hammer_ip_*() functions merge in-memory records with on-disk
    * records for the purposes of the search.
    */
	obj_id = 0;
	localization = HAMMER_DEF_LOCALIZATION;

	if (error == 0) {
		error = hammer_ip_first(&cursor);
		while (error == 0) {
			error = hammer_ip_resolve_data(&cursor);
			if (error)
				break;
			if (nlen == cursor.leaf->data_len -
				HAMMER_ENTRY_NAME_OFF &&
				bcmp(dentry->d_name.name,
				cursor.data->entry.name, nlen) == 0) {
				obj_id = cursor.data->entry.obj_id;
				localization = cursor.data->entry.localization;
				break;
			}
			error = hammer_ip_next(&cursor);
		}
	}
	hammer_done_cursor(&cursor);
	if (error == 0) {
		ip = hammer_get_inode(&trans, dip, obj_id,
				asof, localization,
				flags, &error);
		if (error == 0)
			error = hammerfs_get_inode(sb, ip, &inode);
			/* hammer_rel_inode(ip, 0); */
		else
			ip = NULL;
		if (error == 0)
			d_add(dentry, inode);
		goto done;
	}
done:
	/*hammer_done_transaction(&trans);*/
	return NULL;
}
Ejemplo n.º 7
0
/*
 * Retrieve the PFS hammer cleanup utility config record.  This is
 * different (newer than) the PFS config.
 *
 * This is kinda a hack.
 */
static
int
hammer_ioc_set_config(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_config *config)
{
	struct hammer_btree_leaf_elm leaf;
	struct hammer_cursor cursor;
	hammer_mount_t hmp = ip->hmp;
	int error;

again:
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	bzero(&leaf, sizeof(leaf));
	leaf.base.obj_id = HAMMER_OBJID_ROOT;
	leaf.base.rec_type = HAMMER_RECTYPE_CONFIG;
	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
	leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	leaf.base.key = 0;	/* page 0 */
	leaf.data_len = sizeof(struct hammer_config_data);

	cursor.key_beg = leaf.base;

	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;

	error = hammer_btree_lookup(&cursor);
	if (error == 0) {
		error = hammer_btree_extract_data(&cursor);
		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
						0, 0, 0, NULL);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
	}
	if (error == ENOENT)
		error = 0;
	if (error == 0) {
		/*
		 * NOTE: Must reload key_beg after an ASOF search because
		 *	 the create_tid may have been modified during the
		 *	 search.
		 */
		cursor.flags &= ~HAMMER_CURSOR_ASOF;
		cursor.key_beg = leaf.base;
		error = hammer_create_at_cursor(&cursor, &leaf,
						&config->config,
						HAMMER_CREATE_MODE_SYS);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
	}
	config->head.error = error;
	hammer_done_cursor(&cursor);
	return(0);
}
Ejemplo n.º 8
0
// corresponds to hammer_vop_strategy_read
int hammerfs_readpage(struct file *file, struct page *page) 
{
    void *page_addr;
    hammer_mount_t hmp;
    struct buffer_head *bh;
    struct super_block *sb;
    struct hammer_transaction trans;
    struct hammer_cursor cursor;
    struct inode *inode;
    struct hammer_inode *ip;
    hammer_base_elm_t base;
    hammer_off_t disk_offset;
    int64_t rec_offset;
    int64_t file_offset;
    int error = 0;
    int boff;
    int roff;
    int n;
    int i=0;
    int block_num;
    int block_offset;
    int bytes_read;
    int64_t sb_offset;
    hammer_off_t zone2_offset;
    int vol_no;
    hammer_volume_t volume;

    printk ("hammerfs_readpage(page->index=%d)\n", (int) page->index);

    inode = file->f_path.dentry->d_inode;
    ip = (struct hammer_inode *)inode->i_private;
    sb = inode->i_sb;
    hmp = (hammer_mount_t)sb->s_fs_info;
    hammer_simple_transaction(&trans, ip->hmp);
    hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
    file_offset = page->index * PAGE_SIZE;

    if (file_offset > inode->i_size) {
        error = -ENOSPC;
        goto done;
    }

    SetPageUptodate (page);
    page_addr = kmap (page);

    if(!page_addr) {
        error = -ENOSPC;
        goto failed;
    }

   /*
    * Key range (begin and end inclusive) to scan.  Note that the key's
    * stored in the actual records represent BASE+LEN, not BASE.  The
    * first record containing bio_offset will have a key > bio_offset.
    */
    cursor.key_beg.localization = ip->obj_localization +
                                  HAMMER_LOCALIZE_MISC;
    cursor.key_beg.obj_id = ip->obj_id;
    cursor.key_beg.create_tid = 0;
    cursor.key_beg.delete_tid = 0;
    cursor.key_beg.obj_type = 0;
    cursor.key_beg.key = file_offset + 1;
    cursor.asof = ip->obj_asof;
    cursor.flags |= HAMMER_CURSOR_ASOF;

    cursor.key_end = cursor.key_beg;
    KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);

    cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
    cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
    cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
    cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;

    error = hammer_ip_first(&cursor);
    boff = 0;

    while(error == 0) {
       /*
        * Get the base file offset of the record.  The key for
        * data records is (base + bytes) rather then (base).
        */
        base = &cursor.leaf->base;
        rec_offset = base->key - cursor.leaf->data_len;

       /*
        * Calculate the gap, if any, and zero-fill it.
        *
        * n is the offset of the start of the record verses our
        * current seek offset in the bio.
        */
        n = (int)(rec_offset - (file_offset + boff));
        if (n > 0) {
            if (n > PAGE_SIZE - boff)
                n = PAGE_SIZE - boff;
            bzero((char *)page_addr + boff, n);
            boff += n;
            n = 0;
        }

       /*
        * Calculate the data offset in the record and the number
        * of bytes we can copy.
        *
        * There are two degenerate cases.  First, boff may already
        * be at bp->b_bufsize.  Secondly, the data offset within
        * the record may exceed the record's size.
        */
        roff = -n;
        rec_offset += roff;
        n = cursor.leaf->data_len - roff;
        if (n <= 0) {
            printk("hammerfs_readpage: bad n=%d roff=%d\n", n, roff);
            n = 0;
        } else if (n > PAGE_SIZE - boff) {
            n = PAGE_SIZE - boff;
        }

       /*
        * Deal with cached truncations.  This cool bit of code
        * allows truncate()/ftruncate() to avoid having to sync
        * the file.
        *
        * If the frontend is truncated then all backend records are
        * subject to the frontend's truncation.
        *
        * If the backend is truncated then backend records on-disk
        * (but not in-memory) are subject to the backend's
        * truncation.  In-memory records owned by the backend
        * represent data written after the truncation point on the
        * backend and must not be truncated.
        *
        * Truncate operations deal with frontend buffer cache
        * buffers and frontend-owned in-memory records synchronously.
        */
       if (ip->flags & HAMMER_INODE_TRUNCATED) {
               if (hammer_cursor_ondisk(&cursor) ||
                   cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
                       if (ip->trunc_off <= rec_offset)
                               n = 0;
                       else if (ip->trunc_off < rec_offset + n)
                               n = (int)(ip->trunc_off - rec_offset);
               }
       }
       if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
               if (hammer_cursor_ondisk(&cursor)) {
                       if (ip->sync_trunc_off <= rec_offset)
                               n = 0;
                       else if (ip->sync_trunc_off < rec_offset + n)
                               n = (int)(ip->sync_trunc_off - rec_offset);
               }
       }

       /*
        * Calculate the data offset in the record and the number
        * of bytes we can copy.
        */
        disk_offset = cursor.leaf->data_offset + roff;

        // move this to hammerfs_direct_io_read
        zone2_offset = hammer_blockmap_lookup(hmp, disk_offset, &error);
        vol_no = HAMMER_VOL_DECODE(zone2_offset);
        volume = hammer_get_volume(hmp, vol_no, &error);

        // n is the number of bytes we should read, sb_offset the
        // offset on disk
        sb_offset = volume->ondisk->vol_buf_beg + (zone2_offset & HAMMER_OFF_SHORT_MASK);

        while(n > 0 && boff != PAGE_SIZE) {
            block_num = sb_offset / BLOCK_SIZE;
            block_offset = sb_offset % BLOCK_SIZE;

            // the minimum between what is available and what we can maximally provide
            bytes_read = min(BLOCK_SIZE - (int )block_offset, PAGE_SIZE - (int )boff);        

            bh = sb_bread(sb, block_num + i);
            if(!bh) {
                error = -ENOMEM;
                goto failed;
            }
            memcpy((char*)page_addr + roff, (char*)bh->b_data + boff + block_offset, bytes_read);
            brelse(bh);

            n -= bytes_read;
            boff += bytes_read;
            roff += bytes_read;
        }

       /*
        * Iterate until we have filled the request.
        */
        if (boff == PAGE_SIZE)
            break;
        error = hammer_ip_next(&cursor);
    }

    hammer_done_cursor(&cursor);
    hammer_done_transaction(&trans);

failed:
    if (PageLocked (page))
        unlock_page (page);
    kunmap (page);
done:
    return error;
}
Ejemplo n.º 9
0
/*
 * Delete snapshot transaction id(s) from the list of snapshots.
 */
static
int
hammer_ioc_del_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_snapshot *snap)
{
	hammer_mount_t hmp = ip->hmp;
	struct hammer_cursor cursor;
	int error;

	/*
	 * Validate structure
	 */
	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
		return (EINVAL);
	if (snap->index >= snap->count)
		return (EINVAL);

	hammer_lock_ex(&hmp->snapshot_lock);
again:
	/*
	 * Look for keys starting after the previous iteration, or at
	 * the beginning if snap->count is 0.
	 */
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
	cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_ASOF;

	while (snap->index < snap->count) {
		cursor.key_beg.key = (int64_t)snap->snaps[snap->index].tid;
		error = hammer_btree_lookup(&cursor);
		if (error)
			break;
		error = hammer_btree_extract_leaf(&cursor);
		if (error)
			break;
		error = hammer_delete_at_cursor(&cursor, HAMMER_DELETE_DESTROY,
						0, 0, 0, NULL);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
		if (error)
			break;
		++snap->index;
	}
	snap->head.error = error;
	hammer_done_cursor(&cursor);
	hammer_unlock(&hmp->snapshot_lock);
	return(0);
}
Ejemplo n.º 10
0
/*
 * Add a snapshot transaction id(s) to the list of snapshots.
 *
 * NOTE: Records are created with an allocated TID.  If a flush cycle
 *	 is in progress the record may be synced in the current flush
 *	 cycle and the volume header will reflect the allocation of the
 *	 TID, but the synchronization point may not catch up to the
 *	 TID until the next flush cycle.
 */
static
int
hammer_ioc_add_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_snapshot *snap)
{
	hammer_mount_t hmp = ip->hmp;
	struct hammer_btree_leaf_elm leaf;
	struct hammer_cursor cursor;
	int error;

	/*
	 * Validate structure
	 */
	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
		return (EINVAL);
	if (snap->index >= snap->count)
		return (EINVAL);

	hammer_lock_ex(&hmp->snapshot_lock);
again:
	/*
	 * Look for keys starting after the previous iteration, or at
	 * the beginning if snap->count is 0.
	 */
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_BACKEND | HAMMER_CURSOR_ASOF;

	bzero(&leaf, sizeof(leaf));
	leaf.base.obj_id = HAMMER_OBJID_ROOT;
	leaf.base.rec_type = HAMMER_RECTYPE_SNAPSHOT;
	leaf.base.create_tid = hammer_alloc_tid(hmp, 1);
	leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
	leaf.base.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	leaf.data_len = sizeof(struct hammer_snapshot_data);

	while (snap->index < snap->count) {
		leaf.base.key = (int64_t)snap->snaps[snap->index].tid;
		cursor.key_beg = leaf.base;
		error = hammer_btree_lookup(&cursor);
		if (error == 0) {
			error = EEXIST;
			break;
		}

		/*
		 * NOTE: Must reload key_beg after an ASOF search because
		 *	 the create_tid may have been modified during the
		 *	 search.
		 */
		cursor.flags &= ~HAMMER_CURSOR_ASOF;
		cursor.key_beg = leaf.base;
		error = hammer_create_at_cursor(&cursor, &leaf,
						&snap->snaps[snap->index],
						HAMMER_CREATE_MODE_SYS);
		if (error == EDEADLK) {
			hammer_done_cursor(&cursor);
			goto again;
		}
		cursor.flags |= HAMMER_CURSOR_ASOF;
		if (error)
			break;
		++snap->index;
	}
	snap->head.error = error;
	hammer_done_cursor(&cursor);
	hammer_unlock(&hmp->snapshot_lock);
	return(0);
}
Ejemplo n.º 11
0
/*
 * Set version info
 */
static
int
hammer_ioc_set_version(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_version *ver)
{
	hammer_mount_t hmp = trans->hmp;
	struct hammer_cursor cursor;
	hammer_volume_t volume;
	int error;
	int over = hmp->version;

	/*
	 * Generally do not allow downgrades.  However, version 4 can
	 * be downgraded to version 3.
	 */
	if (ver->cur_version < hmp->version) {
		if (!(ver->cur_version == 3 && hmp->version == 4))
			return(EINVAL);
	}
	if (ver->cur_version == hmp->version)
		return(0);
	if (ver->cur_version > HAMMER_VOL_VERSION_MAX)
		return(EINVAL);
	if (hmp->ronly)
		return(EROFS);

	/*
	 * Update the root volume header and the version cached in
	 * the hammer_mount structure.
	 */
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error)
		goto failed;
	hammer_lock_ex(&hmp->flusher.finalize_lock);
	hammer_sync_lock_ex(trans);
	hmp->version = ver->cur_version;

	/*
	 * If upgrading from version < 4 to version >= 4 the UNDO FIFO
	 * must be reinitialized.
	 */
	if (over < HAMMER_VOL_VERSION_FOUR &&
	    ver->cur_version >= HAMMER_VOL_VERSION_FOUR) {
		hkprintf("upgrade undo to version 4\n");
		error = hammer_upgrade_undo_4(trans);
		if (error)
			goto failed;
	}

	/*
	 * Adjust the version in the volume header
	 */
	volume = hammer_get_root_volume(hmp, &error);
	KKASSERT(error == 0);
	hammer_modify_volume_field(cursor.trans, volume, vol_version);
	volume->ondisk->vol_version = ver->cur_version;
	hammer_modify_volume_done(volume);
	hammer_rel_volume(volume, 0);

	hammer_sync_unlock(trans);
	hammer_unlock(&hmp->flusher.finalize_lock);
failed:
	ver->head.error = error;
	hammer_done_cursor(&cursor);
	return(0);
}
Ejemplo n.º 12
0
static
int
hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
		      struct hammer_ioc_history *hist)
{
	struct hammer_cursor cursor;
	hammer_btree_elm_t elm;
	int error;

	/*
	 * Validate the structure and initialize for return.
	 */
	if (hist->beg_tid > hist->end_tid)
		return(EINVAL);
	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
		if (hist->key > hist->nxt_key)
			return(EINVAL);
	}

	hist->obj_id = ip->obj_id;
	hist->count = 0;
	hist->nxt_tid = hist->end_tid;
	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_TID;
	hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY;
	hist->head.flags &= ~HAMMER_IOC_HISTORY_EOF;
	hist->head.flags &= ~HAMMER_IOC_HISTORY_UNSYNCED;
	if ((ip->flags & HAMMER_INODE_MODMASK) &
	    ~(HAMMER_INODE_ATIME | HAMMER_INODE_MTIME)) {
		hist->head.flags |= HAMMER_IOC_HISTORY_UNSYNCED;
	}

	/*
	 * Setup the cursor.  We can't handle undeletable records
	 * (create_tid of 0) at the moment.  A create_tid of 0 has
	 * a special meaning and cannot be specified in the cursor.
	 */
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.key_beg.obj_id = hist->obj_id;
	cursor.key_beg.create_tid = hist->beg_tid;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.obj_type = 0;
	if (cursor.key_beg.create_tid == HAMMER_MIN_TID)
		cursor.key_beg.create_tid = 1;

	cursor.key_end.obj_id = hist->obj_id;
	cursor.key_end.create_tid = hist->end_tid;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE;

	if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
		/*
		 * key-range within the file.  For a regular file the
		 * on-disk key represents BASE+LEN, not BASE, so the
		 * first possible record containing the offset 'key'
		 * has an on-disk key of (key + 1).
		 */
		cursor.key_beg.key = hist->key;
		cursor.key_end.key = HAMMER_MAX_KEY;
		cursor.key_beg.localization = ip->obj_localization |
					      HAMMER_LOCALIZE_MISC;
		cursor.key_end.localization = ip->obj_localization |
					      HAMMER_LOCALIZE_MISC;

		switch(ip->ino_data.obj_type) {
		case HAMMER_OBJTYPE_REGFILE:
			++cursor.key_beg.key;
			cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
			break;
		case HAMMER_OBJTYPE_DIRECTORY:
			cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
			cursor.key_beg.localization = ip->obj_localization |
						hammer_dir_localization(ip);
			cursor.key_end.localization = ip->obj_localization |
						hammer_dir_localization(ip);
			break;
		case HAMMER_OBJTYPE_DBFILE:
			cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
			break;
		default:
			error = EINVAL;
			break;
		}
		cursor.key_end.rec_type = cursor.key_beg.rec_type;
	} else {
		/*
		 * The inode itself.
		 */
		cursor.key_beg.key = 0;
		cursor.key_end.key = 0;
		cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
		cursor.key_end.rec_type = HAMMER_RECTYPE_INODE;
		cursor.key_beg.localization = ip->obj_localization |
					      HAMMER_LOCALIZE_INODE;
		cursor.key_end.localization = ip->obj_localization |
					      HAMMER_LOCALIZE_INODE;
	}

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		elm = &cursor.node->ondisk->elms[cursor.index];

		add_history(ip, hist, elm);
		if (hist->head.flags & (HAMMER_IOC_HISTORY_NEXT_TID |
				        HAMMER_IOC_HISTORY_NEXT_KEY |
				        HAMMER_IOC_HISTORY_EOF)) {
			break;
		}
		error = hammer_btree_iterate(&cursor);
	}
	if (error == ENOENT) {
		hist->head.flags |= HAMMER_IOC_HISTORY_EOF;
		error = 0;
	}
	hammer_done_cursor(&cursor);
	return(error);
}
Ejemplo n.º 13
0
int
hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
		 struct hammer_ioc_prune *prune)
{
	struct hammer_cursor cursor;
	hammer_btree_leaf_elm_t elm;
	struct hammer_ioc_prune_elm *copy_elms;
	struct hammer_ioc_prune_elm *user_elms;
	int error;
	int isdir;
	int elm_array_size;
	int seq;

	if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS)
		return(EINVAL);
	if ((prune->key_beg.localization | prune->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (prune->key_beg.localization > prune->key_end.localization)
		return(EINVAL);
	if (prune->key_beg.localization == prune->key_end.localization) {
		if (prune->key_beg.obj_id > prune->key_end.obj_id)
			return(EINVAL);
		/* key-space limitations - no check needed */
	}
	if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
		return(EINVAL);
/* 22 EINVAL */

	prune->key_cur.localization = (prune->key_end.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	prune->key_cur.obj_id = prune->key_end.obj_id;
	prune->key_cur.key = HAMMER_MAX_KEY;

	/*
	 * Copy element array from userland
	 */
	elm_array_size = sizeof(*copy_elms) * prune->nelms;
	user_elms = prune->elms;
	copy_elms = kmalloc(elm_array_size, M_TEMP, M_WAITOK);
	if ((error = copyin(user_elms, copy_elms, elm_array_size)) != 0)
		goto failed;
	prune->elms = copy_elms;

	seq = trans->hmp->flusher.done;

	/*
	 * Scan backwards.  Retries typically occur if a deadlock is detected.
	 */
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = (prune->key_beg.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	cursor.key_beg.obj_id = prune->key_beg.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = prune->key_cur.localization;
	cursor.key_end.obj_id = prune->key_cur.obj_id;
	cursor.key_end.key = prune->key_cur.key;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	/*
	 * This flag allows the B-Tree code to clean up loose ends.  At
	 * the moment (XXX) it also means we have to hold the sync lock
	 * through the iteration.
	 */
	cursor.flags |= HAMMER_CURSOR_PRUNING;

	hammer_sync_lock_sh(trans);
	error = hammer_btree_last(&cursor);
	hammer_sync_unlock(trans);

	while (error == 0) {
		/*
		 * Check for work
		 */
		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
		prune->key_cur = elm->base;

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		if (prune->stat_oldest_tid > elm->base.create_tid)
			prune->stat_oldest_tid = elm->base.create_tid;

		if (hammer_debug_general & 0x0200) {
			kprintf("check %016llx %016llx cre=%016llx del=%016llx\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key,
					(long long)elm->base.create_tid,
					(long long)elm->base.delete_tid);
		}
				
		if (prune_should_delete(prune, elm)) {
			if (hammer_debug_general & 0x0200) {
				kprintf("check %016llx %016llx: DELETE\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key);
			}

			/*
			 * NOTE: This can return EDEADLK
			 *
			 * Acquiring the sync lock guarantees that the
			 * operation will not cross a synchronization
			 * boundary (see the flusher).
			 *
			 * We dont need to track inodes or next_tid when
			 * we are destroying deleted records.
			 */
			isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);

			hammer_sync_lock_sh(trans);
			error = hammer_delete_at_cursor(&cursor,
							HAMMER_DELETE_DESTROY,
							cursor.trans->tid,
							cursor.trans->time32,
							0, &prune->stat_bytes);
			hammer_sync_unlock(trans);
			if (error)
				break;

			if (isdir)
				++prune->stat_dirrecords;
			else
				++prune->stat_rawrecords;

			/*
			 * The current record might now be the one after
			 * the one we deleted, set ATEDISK to force us
			 * to skip it (since we are iterating backwards).
			 */
			cursor.flags |= HAMMER_CURSOR_ATEDISK;
		} else {
			/*
			 * Nothing to delete, but we may have to check other
			 * things.
			 */
			prune_check_nlinks(&cursor, elm);
			cursor.flags |= HAMMER_CURSOR_ATEDISK;
			if (hammer_debug_general & 0x0100) {
				kprintf("check %016llx %016llx: SKIP\n",
					(long long)elm->base.obj_id,
					(long long)elm->base.key);
			}
		}
		++prune->stat_scanrecords;

		/*
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}
		hammer_sync_lock_sh(trans);
		error = hammer_btree_iterate_reverse(&cursor);
		hammer_sync_unlock(trans);
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		prune->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	prune->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	prune->elms = user_elms;
	kfree(copy_elms, M_TEMP);
	return(error);
}
Ejemplo n.º 14
0
int
hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_reblock *reblock)
{
	struct hammer_cursor cursor;
	hammer_btree_elm_t elm;
	int checkspace_count;
	int error;
	int seq;
	int slop;

	/*
	 * A fill level <= 20% is considered an emergency.  free_level is
	 * inverted from fill_level.
	 */
	if (reblock->free_level >= HAMMER_LARGEBLOCK_SIZE * 8 / 10)
		slop = HAMMER_CHKSPC_EMERGENCY;
	else
		slop = HAMMER_CHKSPC_REBLOCK;

	if ((reblock->key_beg.localization | reblock->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
		return(EINVAL);
	if (reblock->free_level < 0)
		return(EINVAL);

	reblock->key_cur = reblock->key_beg;
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	reblock->key_cur.localization += ip->obj_localization;

	checkspace_count = 0;
	seq = trans->hmp->flusher.done;
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = reblock->key_cur.localization;
	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = (reblock->key_end.localization &
					HAMMER_LOCALIZE_MASK) +
				      ip->obj_localization;
	cursor.key_end.obj_id = reblock->key_end.obj_id;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;

	/*
	 * This flag allows the btree scan code to return internal nodes,
	 * so we can reblock them in addition to the leafs.  Only specify it
	 * if we intend to reblock B-Tree nodes.
	 */
	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
		cursor.flags |= HAMMER_CURSOR_REBLOCKING;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Internal or Leaf node
		 */
		KKASSERT(cursor.index < cursor.node->ondisk->count);
		elm = &cursor.node->ondisk->elms[cursor.index];
		reblock->key_cur.obj_id = elm->base.obj_id;
		reblock->key_cur.localization = elm->base.localization;

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		/*
		 * If there is insufficient free space it may be due to
		 * reserved bigblocks, which flushing might fix.
		 *
		 * We must force a retest in case the unlocked cursor is
		 * moved to the end of the leaf, or moved to an internal
		 * node.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		if (hammer_checkspace(trans->hmp, slop)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			cursor.flags |= HAMMER_CURSOR_RETEST;
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
			goto skip;
		}

		/*
		 * Acquiring the sync_lock prevents the operation from
		 * crossing a synchronization boundary.
		 *
		 * NOTE: cursor.node may have changed on return.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		hammer_sync_lock_sh(trans);
		error = hammer_reblock_helper(reblock, &cursor, elm);
		hammer_sync_unlock(trans);

		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * Setup for iteration, our cursor flags may be modified by
		 * other threads while we are unlocked.
		 */
		cursor.flags |= HAMMER_CURSOR_ATEDISK;

		/*
		 * We allocate data buffers, which atm we don't track
		 * dirty levels for because we allow the kernel to write
		 * them.  But if we allocate too many we can still deadlock
		 * the buffer cache.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 *	    (The cursor's node and element may change!)
		 */
		if (bd_heatup()) {
			hammer_unlock_cursor(&cursor);
			bwillwrite(HAMMER_XBUFSIZE);
			hammer_lock_cursor(&cursor);
		}
		/* XXX vm_wait_nominal(); */
skip:
		if (error == 0) {
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EWOULDBLOCK) {
		hammer_flusher_sync(trans->hmp);
		goto retry;
	}
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}
Ejemplo n.º 15
0
/*
 * Copy records from userland to the target mirror.
 *
 * The PFS is identified in the mirror structure.  The passed ip is just
 * some directory in the overall HAMMER filesystem and has nothing to
 * do with the PFS.  In fact, there might not even be a root directory for
 * the PFS yet!
 */
int
hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip,
		       struct hammer_ioc_mirror_rw *mirror)
{
	union hammer_ioc_mrecord_any mrec;
	struct hammer_cursor cursor;
	u_int32_t localization;
	int checkspace_count = 0;
	int error;
	int bytes;
	char *uptr;
	int seq;

	localization = (u_int32_t)mirror->pfs_id << 16;
	seq = trans->hmp->flusher.done;

	/*
	 * Validate the mirror structure and relocalize the tracking keys.
	 */
	if (mirror->size < 0 || mirror->size > 0x70000000)
		return(EINVAL);
	mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_beg.localization += localization;
	mirror->key_end.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_end.localization += localization;
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_cur.localization += localization;

	/*
	 * Set up our tracking cursor for the loop.  The tracking cursor
	 * is used to delete records that are no longer present on the
	 * master.  The last handled record at key_cur must be skipped.
	 */
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);

	cursor.key_beg = mirror->key_cur;
	cursor.key_end = mirror->key_end;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	error = hammer_btree_first(&cursor);
	if (error == 0)
		cursor.flags |= HAMMER_CURSOR_ATEDISK;
	if (error == ENOENT)
		error = 0;

	/*
	 * Loop until our input buffer has been exhausted.
	 */
	while (error == 0 &&
		mirror->count + sizeof(mrec.head) <= mirror->size) {

	        /*
		 * Don't blow out the buffer cache.  Leave room for frontend
		 * cache as well.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * If there is insufficient free space it may be due to
		 * reserved bigblocks, which flushing might fix.
		 */
		if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
		}


		/*
		 * Acquire and validate header
		 */
		if ((bytes = mirror->size - mirror->count) > sizeof(mrec))
			bytes = sizeof(mrec);
		uptr = (char *)mirror->ubuf + mirror->count;
		error = copyin(uptr, &mrec, bytes);
		if (error)
			break;
		if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) {
			error = EINVAL;
			break;
		}
		if (mrec.head.rec_size < sizeof(mrec.head) ||
		    mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE ||
		    mirror->count + mrec.head.rec_size > mirror->size) {
			error = EINVAL;
			break;
		}

		switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) {
		case HAMMER_MREC_TYPE_SKIP:
			if (mrec.head.rec_size != sizeof(mrec.skip))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization);
			break;
		case HAMMER_MREC_TYPE_REC:
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec));
			break;
		case HAMMER_MREC_TYPE_REC_NODATA:
		case HAMMER_MREC_TYPE_REC_BADCRC:
			/*
			 * Records with bad data payloads are ignored XXX.
			 * Records with no data payload have to be skipped
			 * (they shouldn't have been written in the first
			 * place).
			 */
			if (mrec.head.rec_size < sizeof(mrec.rec))
				error = EINVAL;
			break;
		case HAMMER_MREC_TYPE_PASS:
			if (mrec.head.rec_size != sizeof(mrec.rec))
				error = EINVAL;
			if (error == 0)
				error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization);
			break;
		default:
			error = EINVAL;
			break;
		}

		/*
		 * Retry the current record on deadlock, otherwise setup
		 * for the next loop.
		 */
		if (error == EDEADLK) {
			while (error == EDEADLK) {
				hammer_sync_lock_sh(trans);
				hammer_recover_cursor(&cursor);
				error = hammer_cursor_upgrade(&cursor);
				hammer_sync_unlock(trans);
			}
		} else {
			if (error == EALREADY)
				error = 0;
			if (error == 0) {
				mirror->count += 
					HAMMER_HEAD_DOALIGN(mrec.head.rec_size);
			}
		}
	}
	hammer_done_cursor(&cursor);

	/*
	 * cumulative error 
	 */
	if (error) {
		mirror->head.flags |= HAMMER_IOC_HEAD_ERROR;
		mirror->head.error = error;
	}

	/*
	 * ioctls don't update the RW data structure if an error is returned,
	 * always return 0.
	 */
	return(0);
}
Ejemplo n.º 16
0
/*
 * Retrieve as many snapshot ids as possible or until the array is
 * full, starting after the last transaction id passed in.  If count
 * is 0 we retrieve starting at the beginning.
 *
 * NOTE: Because the b-tree key field is signed but transaction ids
 *       are unsigned the returned list will be signed-sorted instead
 *	 of unsigned sorted.  The Caller must still sort the aggregate
 *	 results.
 */
static
int
hammer_ioc_get_snapshot(hammer_transaction_t trans, hammer_inode_t ip,
			struct hammer_ioc_snapshot *snap)
{
	struct hammer_cursor cursor;
	int error;

	/*
	 * Validate structure
	 */
	if (snap->index != 0)
		return (EINVAL);
	if (snap->count > HAMMER_SNAPS_PER_IOCTL)
		return (EINVAL);

	/*
	 * Look for keys starting after the previous iteration, or at
	 * the beginning if snap->count is 0.
	 */
	error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		return(error);
	}

	cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
	cursor.key_beg.create_tid = 0;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.obj_type = 0;
	cursor.key_beg.rec_type = HAMMER_RECTYPE_SNAPSHOT;
	cursor.key_beg.localization = ip->obj_localization | HAMMER_LOCALIZE_INODE;
	if (snap->count == 0)
		cursor.key_beg.key = HAMMER_MIN_KEY;
	else
		cursor.key_beg.key = (int64_t)snap->snaps[snap->count - 1].tid + 1;

	cursor.key_end = cursor.key_beg;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.asof = HAMMER_MAX_TID;
	cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE | HAMMER_CURSOR_ASOF;

	snap->count = 0;

	error = hammer_btree_first(&cursor);
	while (error == 0 && snap->count < HAMMER_SNAPS_PER_IOCTL) {
		error = hammer_btree_extract_leaf(&cursor);
		if (error)
			break;
		if (cursor.leaf->base.rec_type == HAMMER_RECTYPE_SNAPSHOT) {
			error = hammer_btree_extract_data(&cursor);
			snap->snaps[snap->count] = cursor.data->snap;

			/*
			 * The snap data tid should match the key but might
			 * not due to a bug in the HAMMER v3 conversion code.
			 *
			 * This error will work itself out over time but we
			 * have to force a match or the snapshot will not
			 * be deletable.
			 */
			if (cursor.data->snap.tid !=
			    (hammer_tid_t)cursor.leaf->base.key) {
				hkprintf("lo=%08x snapshot key "
					"0x%016jx data mismatch 0x%016jx\n",
					cursor.key_beg.localization,
					(uintmax_t)cursor.data->snap.tid,
					cursor.leaf->base.key);
				hkprintf("Probably left over from the "
					"original v3 conversion, hammer "
					"cleanup should get it eventually\n");
				snap->snaps[snap->count].tid =
					cursor.leaf->base.key;
			}
			++snap->count;
		}
		error = hammer_btree_iterate(&cursor);
	}

	if (error == ENOENT) {
		snap->head.flags |= HAMMER_IOC_SNAPSHOT_EOF;
		error = 0;
	}
	snap->head.error = error;
	hammer_done_cursor(&cursor);
	return(0);
}
Ejemplo n.º 17
0
/*
 * All B-Tree records within the specified key range which also conform
 * to the transaction id range are returned.  Mirroring code keeps track
 * of the last transaction id fully scanned and can efficiently pick up
 * where it left off if interrupted.
 *
 * The PFS is identified in the mirror structure.  The passed ip is just
 * some directory in the overall HAMMER filesystem and has nothing to
 * do with the PFS.
 */
int
hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip,
		       struct hammer_ioc_mirror_rw *mirror)
{
	struct hammer_cmirror cmirror;
	struct hammer_cursor cursor;
	union hammer_ioc_mrecord_any mrec;
	hammer_btree_leaf_elm_t elm;
	const int crc_start = HAMMER_MREC_CRCOFF;
	char *uptr;
	int error;
	int data_len;
	int bytes;
	int eatdisk;
	int mrec_flags;
	u_int32_t localization;
	u_int32_t rec_crc;

	localization = (u_int32_t)mirror->pfs_id << 16;

	if ((mirror->key_beg.localization | mirror->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0)
		return(EINVAL);

	mirror->key_cur = mirror->key_beg;
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	mirror->key_cur.localization += localization;
	bzero(&mrec, sizeof(mrec));
	bzero(&cmirror, sizeof(cmirror));

	/*
	 * Make CRC errors non-fatal (at least on data), causing an EDOM
	 * error instead of EIO.
	 */
	trans->flags |= HAMMER_TRANSF_CRCDOM;

retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg = mirror->key_cur;
	cursor.key_end = mirror->key_end;
	cursor.key_end.localization &= HAMMER_LOCALIZE_MASK;
	cursor.key_end.localization += localization;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;

	/*
	 * This flag filters the search to only return elements whos create
	 * or delete TID is >= mirror_tid.  The B-Tree uses the mirror_tid
	 * field stored with internal and leaf nodes to shortcut the scan.
	 */
	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
	cursor.cmirror = &cmirror;
	cmirror.mirror_tid = mirror->tid_beg;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Yield to more important tasks
		 */
		if (error == 0) {
			error = hammer_signal_check(trans->hmp);
			if (error)
				break;
		}

		/*
		 * An internal node can be returned in mirror-filtered
		 * mode and indicates that the scan is returning a skip
		 * range in the cursor->cmirror structure.
		 */
		uptr = (char *)mirror->ubuf + mirror->count;
		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
			/*
			 * Check space
			 */
			mirror->key_cur = cmirror.skip_beg;
			bytes = sizeof(mrec.skip);
			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
			    mirror->size) {
				break;
			}

			/*
			 * Fill mrec
			 */
			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
			mrec.head.type = HAMMER_MREC_TYPE_SKIP;
			mrec.head.rec_size = bytes;
			mrec.skip.skip_beg = cmirror.skip_beg;
			mrec.skip.skip_end = cmirror.skip_end;
			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
						 bytes - crc_start);
			error = copyout(&mrec, uptr, bytes);
			eatdisk = 0;
			goto didwrite;
		}

		/*
		 * Leaf node.  In full-history mode we could filter out
		 * elements modified outside the user-requested TID range.
		 *
		 * However, such elements must be returned so the writer
		 * can compare them against the target to determine what
		 * needs to be deleted on the target, particular for
		 * no-history mirrors.
		 */
		KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
		mirror->key_cur = elm->base;

		/*
		 * If the record was created after our end point we just
		 * ignore it.
		 */
		if (elm->base.create_tid > mirror->tid_end) {
			error = 0;
			bytes = 0;
			eatdisk = 1;
			goto didwrite;
		}

		/*
		 * Determine if we should generate a PASS or a REC.  PASS
		 * records are records without any data payload.  Such
		 * records will be generated if the target is already expected
		 * to have the record, allowing it to delete the gaps.
		 *
		 * A PASS record is also used to perform deletions on the
		 * target.
		 *
		 * Such deletions are needed if the master or files on the
		 * master are no-history, or if the slave is so far behind
		 * the master has already been pruned.
		 */
		if (elm->base.create_tid < mirror->tid_beg) {
			bytes = sizeof(mrec.rec);
			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
			    mirror->size) {
				break;
			}

			/*
			 * Fill mrec.
			 */
			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
			mrec.head.type = HAMMER_MREC_TYPE_PASS;
			mrec.head.rec_size = bytes;
			mrec.rec.leaf = *elm;
			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
						 bytes - crc_start);
			error = copyout(&mrec, uptr, bytes);
			eatdisk = 1;
			goto didwrite;
			
		}

		/*
		 * The core code exports the data to userland.
		 *
		 * CRC errors on data are reported but passed through,
		 * but the data must be washed by the user program.
		 *
		 * If userland just wants the btree records it can
		 * request that bulk data not be returned.  This is
		 * use during mirror-stream histogram generation.
		 */
		mrec_flags = 0;
		data_len = (elm->data_offset) ? elm->data_len : 0;
		if (data_len &&
		    (mirror->head.flags & HAMMER_IOC_MIRROR_NODATA)) {
			data_len = 0;
			mrec_flags |= HAMMER_MRECF_NODATA;
		}
		if (data_len) {
			error = hammer_btree_extract(&cursor,
						     HAMMER_CURSOR_GET_DATA);
			if (error) {
				if (error != EDOM)
					break;
				mrec_flags |= HAMMER_MRECF_CRC_ERROR |
					      HAMMER_MRECF_DATA_CRC_BAD;
			}
		}

		bytes = sizeof(mrec.rec) + data_len;
		if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size)
			break;

		/*
		 * Construct the record for userland and copyout.
		 *
		 * The user is asking for a snapshot, if the record was
		 * deleted beyond the user-requested ending tid, the record
		 * is not considered deleted from the point of view of
		 * userland and delete_tid is cleared.
		 */
		mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
		mrec.head.type = HAMMER_MREC_TYPE_REC | mrec_flags;
		mrec.head.rec_size = bytes;
		mrec.rec.leaf = *elm;

		if (elm->base.delete_tid > mirror->tid_end)
			mrec.rec.leaf.base.delete_tid = 0;
		rec_crc = crc32(&mrec.head.rec_size,
				sizeof(mrec.rec) - crc_start);
		if (data_len)
			rec_crc = crc32_ext(cursor.data, data_len, rec_crc);
		mrec.head.rec_crc = rec_crc;
		error = copyout(&mrec, uptr, sizeof(mrec.rec));
		if (data_len && error == 0) {
			error = copyout(cursor.data, uptr + sizeof(mrec.rec),
					data_len);
		}
		eatdisk = 1;

		/*
		 * eatdisk controls whether we skip the current cursor
		 * position on the next scan or not.  If doing a SKIP
		 * the cursor is already positioned properly for the next
		 * scan and eatdisk will be 0.
		 */
didwrite:
		if (error == 0) {
			mirror->count += HAMMER_HEAD_DOALIGN(bytes);
			if (eatdisk)
				cursor.flags |= HAMMER_CURSOR_ATEDISK;
			else
				cursor.flags &= ~HAMMER_CURSOR_ATEDISK;
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT) {
		mirror->key_cur = mirror->key_end;
		error = 0;
	}
	hammer_done_cursor(&cursor);
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		mirror->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}
Ejemplo n.º 18
0
int
hammer_ioc_dedup(hammer_transaction_t trans, hammer_inode_t ip,
		 struct hammer_ioc_dedup *dedup)
{
	struct hammer_cursor cursor1, cursor2;
	int error;
	int seq;

	/*
	 * Enforce hammer filesystem version requirements
	 */
	if (trans->hmp->version < HAMMER_VOL_VERSION_FIVE) {
		kprintf("hammer: Filesystem must be upgraded to v5 "
			"before you can run dedup\n");
		return (EOPNOTSUPP); /* 95*/
	}

	/*
	 * Cursor1, return an error -> candidate goes to pass2 list
	 */
	error = hammer_init_cursor(trans, &cursor1, NULL, NULL);
	if (error)
		goto done_cursor;
	cursor1.key_beg = dedup->elm1;
	cursor1.flags |= HAMMER_CURSOR_BACKEND;

	error = hammer_btree_lookup(&cursor1);
	if (error)
		goto done_cursor;
	error = hammer_btree_extract(&cursor1, HAMMER_CURSOR_GET_LEAF |
						HAMMER_CURSOR_GET_DATA);
	if (error)
		goto done_cursor;

	/*
	 * Cursor2, return an error -> candidate goes to pass2 list
	 */
	error = hammer_init_cursor(trans, &cursor2, NULL, NULL);
	if (error)
		goto done_cursors;
	cursor2.key_beg = dedup->elm2;
	cursor2.flags |= HAMMER_CURSOR_BACKEND;

	error = hammer_btree_lookup(&cursor2);
	if (error)
		goto done_cursors;
	error = hammer_btree_extract(&cursor2, HAMMER_CURSOR_GET_LEAF |
						HAMMER_CURSOR_GET_DATA);
	if (error)
		goto done_cursors;

	/*
	 * Zone validation. We can't de-dup any of the other zones
	 * (BTREE or META) or bad things will happen.
	 *
	 * Return with error = 0, but set an INVALID_ZONE flag.
	 */
	error = validate_zone(cursor1.leaf->data_offset) +
			    validate_zone(cursor2.leaf->data_offset);
	if (error) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_INVALID_ZONE;
		error = 0;
		goto done_cursors;
	}

	/*
	 * Comparison checks
	 *
	 * If zones don't match or data_len fields aren't the same
	 * we consider it to be a comparison failure.
	 *
	 * Return with error = 0, but set a CMP_FAILURE flag.
	 */
	if ((cursor1.leaf->data_offset & HAMMER_OFF_ZONE_MASK) !=
	    (cursor2.leaf->data_offset & HAMMER_OFF_ZONE_MASK)) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}
	if (cursor1.leaf->data_len != cursor2.leaf->data_len) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}

	/* byte-by-byte comparison to be sure */
	if (bcmp(cursor1.data, cursor2.data, cursor1.leaf->data_len)) {
		dedup->head.flags |= HAMMER_IOC_DEDUP_CMP_FAILURE;
		goto done_cursors;
	}

	/*
	 * Upgrade both cursors together to an exclusive lock
	 *
	 * Return an error -> candidate goes to pass2 list
	 */
	hammer_sync_lock_sh(trans);
	error = hammer_cursor_upgrade2(&cursor1, &cursor2);
	if (error) {
		hammer_sync_unlock(trans);
		goto done_cursors;
	}

	error = hammer_blockmap_dedup(cursor1.trans,
			cursor1.leaf->data_offset, cursor1.leaf->data_len);
	if (error) {
		if (error == ERANGE) {
			/*
			 * Return with error = 0, but set an UNDERFLOW flag
			 */
			dedup->head.flags |= HAMMER_IOC_DEDUP_UNDERFLOW;
			error = 0;
			goto downgrade_cursors;
		} else {
			/*
			 * Return an error -> block goes to pass2 list
			 */
			goto downgrade_cursors;
		}
	}

	/*
	 * The cursor2's cache must be invalidated before calling
	 * hammer_blockmap_free(), otherwise it will not be able to
	 * invalidate the underlying data buffer.
	 */
	hammer_cursor_invalidate_cache(&cursor2);
	hammer_blockmap_free(cursor2.trans,
			cursor2.leaf->data_offset, cursor2.leaf->data_len);

	hammer_modify_node(cursor2.trans, cursor2.node,
			&cursor2.leaf->data_offset, sizeof(hammer_off_t));
	cursor2.leaf->data_offset = cursor1.leaf->data_offset;
	hammer_modify_node_done(cursor2.node);

downgrade_cursors:
	hammer_cursor_downgrade2(&cursor1, &cursor2);
	hammer_sync_unlock(trans);
done_cursors:
	hammer_done_cursor(&cursor2);
done_cursor:
	hammer_done_cursor(&cursor1);

	/*
	 * Avoid deadlocking the buffer cache
	 */
	seq = trans->hmp->flusher.done;
	while (hammer_flusher_meta_halflimit(trans->hmp) ||
	       hammer_flusher_undo_exhausted(trans, 2)) {
		hammer_flusher_wait(trans->hmp, seq);
		seq = hammer_flusher_async_one(trans->hmp);
	}
	return (error);
}