示例#1
0
/*
 * Flush a single inode that is part of a flush group.
 *
 * Flusher errors are extremely serious, even ENOSPC shouldn't occur because
 * the front-end should have reserved sufficient space on the media.  Any
 * error other then EWOULDBLOCK will force the mount to be read-only.
 */
static
int
hammer_flusher_flush_inode(hammer_inode_t ip, void *data)
{
	hammer_flusher_info_t info = data;
	hammer_mount_t hmp = info->hmp;
	hammer_transaction_t trans = &info->trans;
	int error;

	/*
	 * Several slaves are operating on the same flush group concurrently.
	 * The SLAVEFLUSH flag prevents them from tripping over each other.
	 *
	 * NOTE: It is possible for a EWOULDBLOCK'd ip returned by one slave
	 *	 to be resynced by another, but normally such inodes are not
	 *	 revisited until the master loop gets to them.
	 */
	if (ip->flags & HAMMER_INODE_SLAVEFLUSH)
		return(0);
	ip->flags |= HAMMER_INODE_SLAVEFLUSH;
	++hammer_stats_inode_flushes;

	hammer_flusher_clean_loose_ios(hmp);
	vm_wait_nominal();
	error = hammer_sync_inode(trans, ip);

	/*
	 * EWOULDBLOCK can happen under normal operation, all other errors
	 * are considered extremely serious.  We must set WOULDBLOCK
	 * mechanics to deal with the mess left over from the abort of the
	 * previous flush.
	 */
	if (error) {
		ip->flags |= HAMMER_INODE_WOULDBLOCK;
		if (error == EWOULDBLOCK)
			error = 0;
	}
	hammer_flush_inode_done(ip, error);
	/* ip invalid */

	while (hmp->flusher.finalize_want)
		tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0);
	if (hammer_flusher_undo_exhausted(trans, 1)) {
		hkprintf("Warning: UNDO area too small!\n");
		hammer_flusher_finalize(trans, 1);
	} else if (hammer_flusher_meta_limit(trans->hmp)) {
		hammer_flusher_finalize(trans, 0);
	}
	return (0);
}
示例#2
0
static int
tmpfs_read (struct vop_read_args *ap)
{
	struct buf *bp;
	struct vnode *vp = ap->a_vp;
	struct uio *uio = ap->a_uio;
	struct tmpfs_node *node;
	off_t base_offset;
	size_t offset;
	size_t len;
	size_t resid;
	int error;

	/*
	 * Check the basics
	 */
	if (uio->uio_offset < 0)
		return (EINVAL);
	if (vp->v_type != VREG)
		return (EINVAL);

	/*
	 * Extract node, try to shortcut the operation through
	 * the VM page cache, allowing us to avoid buffer cache
	 * overheads.
	 */
	node = VP_TO_TMPFS_NODE(vp);
        resid = uio->uio_resid;
        error = vop_helper_read_shortcut(ap);
        if (error)
                return error;
        if (uio->uio_resid == 0) {
		if (resid)
			goto finished;
		return error;
	}

	/*
	 * Fall-through to our normal read code.
	 */
	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
		/*
		 * Use buffer cache I/O (via tmpfs_strategy)
		 */
		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
		base_offset = (off_t)uio->uio_offset - offset;
		bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, 0);
		if (bp == NULL) {
			error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp);
			if (error) {
				brelse(bp);
				kprintf("tmpfs_read bread error %d\n", error);
				break;
			}

			/*
			 * tmpfs pretty much fiddles directly with the VM
			 * system, don't let it exhaust it or we won't play
			 * nice with other processes.
			 *
			 * Only do this if the VOP is coming from a normal
			 * read/write.  The VM system handles the case for
			 * UIO_NOCOPY.
			 */
			if (uio->uio_segflg != UIO_NOCOPY)
				vm_wait_nominal();
		}
		bp->b_flags |= B_CLUSTEROK;

		/*
		 * Figure out how many bytes we can actually copy this loop.
		 */
		len = TMPFS_BLKSIZE - offset;
		if (len > uio->uio_resid)
			len = uio->uio_resid;
		if (len > node->tn_size - uio->uio_offset)
			len = (size_t)(node->tn_size - uio->uio_offset);

		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
		bqrelse(bp);
		if (error) {
			kprintf("tmpfs_read uiomove error %d\n", error);
			break;
		}
	}

finished:
	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
		TMPFS_NODE_LOCK(node);
		node->tn_status |= TMPFS_NODE_ACCESSED;
		TMPFS_NODE_UNLOCK(node);
	}
	return (error);
}
示例#3
0
int
hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
		   struct hammer_ioc_reblock *reblock)
{
	struct hammer_cursor cursor;
	hammer_btree_elm_t elm;
	int checkspace_count;
	int error;
	int seq;
	int slop;
	uint32_t key_end_localization;

	if ((reblock->key_beg.localization | reblock->key_end.localization) &
	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
		return(EINVAL);
	}
	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
		return(EINVAL);
	if (reblock->free_level < 0 ||
	    reblock->free_level > HAMMER_BIGBLOCK_SIZE)
		return(EINVAL);

	/*
	 * A fill_percentage <= 20% is considered an emergency.  free_level is
	 * inverted from fill_percentage.
	 */
	if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10)
		slop = HAMMER_CHKSPC_EMERGENCY;
	else
		slop = HAMMER_CHKSPC_REBLOCK;

	/*
	 * Ioctl caller has only set localization type to reblock.
	 * Initialize cursor key localization with ip localization.
	 */
	reblock->key_cur = reblock->key_beg;
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	if (reblock->allpfs == 0)
		reblock->key_cur.localization |= ip->obj_localization;

	key_end_localization = reblock->key_end.localization;
	key_end_localization &= HAMMER_LOCALIZE_MASK;
	if (reblock->allpfs == 0)
		key_end_localization |= ip->obj_localization;
	else
		key_end_localization |= pfs_to_lo(HAMMER_MAX_PFSID);

	checkspace_count = 0;
	seq = trans->hmp->flusher.done;
retry:
	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
	if (error) {
		hammer_done_cursor(&cursor);
		goto failed;
	}
	cursor.key_beg.localization = reblock->key_cur.localization;
	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
	cursor.key_beg.key = HAMMER_MIN_KEY;
	cursor.key_beg.create_tid = 1;
	cursor.key_beg.delete_tid = 0;
	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
	cursor.key_beg.obj_type = 0;

	cursor.key_end.localization = key_end_localization;
	cursor.key_end.obj_id = reblock->key_end.obj_id;
	cursor.key_end.key = HAMMER_MAX_KEY;
	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
	cursor.key_end.delete_tid = 0;
	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
	cursor.key_end.obj_type = 0;

	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
	cursor.flags |= HAMMER_CURSOR_BACKEND;
	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;

	/*
	 * This flag allows the btree scan code to return internal nodes,
	 * so we can reblock them in addition to the leafs.  Only specify it
	 * if we intend to reblock B-Tree nodes.
	 */
	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
		cursor.flags |= HAMMER_CURSOR_REBLOCKING;

	error = hammer_btree_first(&cursor);
	while (error == 0) {
		/*
		 * Internal or Leaf node
		 */
		KKASSERT(cursor.index < cursor.node->ondisk->count);
		elm = &cursor.node->ondisk->elms[cursor.index];
		reblock->key_cur.obj_id = elm->base.obj_id;
		reblock->key_cur.localization = elm->base.localization;

		/*
		 * Filesystem went read-only during rebalancing
		 */
		if (trans->hmp->ronly) {
			error = EROFS;
			break;
		}

		/*
		 * Yield to more important tasks
		 */
		if ((error = hammer_signal_check(trans->hmp)) != 0)
			break;

		/*
		 * If there is insufficient free space it may be due to
		 * reserved big-blocks, which flushing might fix.
		 *
		 * We must force a retest in case the unlocked cursor is
		 * moved to the end of the leaf, or moved to an internal
		 * node.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		if (hammer_checkspace(trans->hmp, slop)) {
			if (++checkspace_count == 10) {
				error = ENOSPC;
				break;
			}
			hammer_unlock_cursor(&cursor);
			cursor.flags |= HAMMER_CURSOR_RETEST;
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async(trans->hmp, NULL);
			goto skip;
		}

		/*
		 * Acquiring the sync_lock prevents the operation from
		 * crossing a synchronization boundary.
		 *
		 * NOTE: cursor.node may have changed on return.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 */
		hammer_sync_lock_sh(trans);
		error = hammer_reblock_helper(reblock, &cursor, elm);
		hammer_sync_unlock(trans);

		while (hammer_flusher_meta_halflimit(trans->hmp) ||
		       hammer_flusher_undo_exhausted(trans, 2)) {
			hammer_unlock_cursor(&cursor);
			hammer_flusher_wait(trans->hmp, seq);
			hammer_lock_cursor(&cursor);
			seq = hammer_flusher_async_one(trans->hmp);
		}

		/*
		 * Setup for iteration, our cursor flags may be modified by
		 * other threads while we are unlocked.
		 */
		cursor.flags |= HAMMER_CURSOR_ATEDISK;

		/*
		 * We allocate data buffers, which atm we don't track
		 * dirty levels for because we allow the kernel to write
		 * them.  But if we allocate too many we can still deadlock
		 * the buffer cache.
		 *
		 * WARNING: See warnings in hammer_unlock_cursor() function.
		 *	    (The cursor's node and element may change!)
		 */
		if (bd_heatup()) {
			hammer_unlock_cursor(&cursor);
			bwillwrite(HAMMER_XBUFSIZE);
			hammer_lock_cursor(&cursor);
		}
		vm_wait_nominal();
skip:
		if (error == 0) {
			error = hammer_btree_iterate(&cursor);
		}
	}
	if (error == ENOENT)
		error = 0;
	hammer_done_cursor(&cursor);
	if (error == EWOULDBLOCK) {
		hammer_flusher_sync(trans->hmp);
		goto retry;
	}
	if (error == EDEADLK)
		goto retry;
	if (error == EINTR) {
		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
		error = 0;
	}
failed:
	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
	return(error);
}