Exemple #1
0
uint64_t
dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
                 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
    uint64_t object;
    uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
                              (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
    dnode_t *dn = NULL;
    int restarted = B_FALSE;

    mutex_enter(&os->os_obj_lock);
    for (;;) {
        object = os->os_obj_next;
        /*
         * Each time we polish off an L2 bp worth of dnodes
         * (2^13 objects), move to another L2 bp that's still
         * reasonably sparse (at most 1/4 full).  Look from the
         * beginning once, but after that keep looking from here.
         * If we can't find one, just keep going from here.
         *
         * Note that dmu_traverse depends on the behavior that we use
         * multiple blocks of the dnode object before going back to
         * reuse objects.  Any change to this algorithm should preserve
         * that property or find another solution to the issues
         * described in traverse_visitbp.
         */
        if (P2PHASE(object, L2_dnode_count) == 0) {
            uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
            int error = dnode_next_offset(DMU_META_DNODE(os),
                                          DNODE_FIND_HOLE,
                                          &offset, 2, DNODES_PER_BLOCK >> 2, 0);
            restarted = B_TRUE;
            if (error == 0)
                object = offset >> DNODE_SHIFT;
        }
        os->os_obj_next = ++object;

        /*
         * XXX We should check for an i/o error here and return
         * up to our caller.  Actually we should pre-read it in
         * dmu_tx_assign(), but there is currently no mechanism
         * to do so.
         */
        (void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
                               FTAG, &dn);
        if (dn)
            break;

        if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
            os->os_obj_next = object - 1;
    }
Exemple #2
0
int
dmu_objset_userspace_upgrade(objset_t *os)
{
	uint64_t obj;
	int err = 0;

	if (dmu_objset_userspace_present(os))
		return (0);
	if (!dmu_objset_userused_enabled(os->os))
		return (ENOTSUP);
	if (dmu_objset_is_snapshot(os))
		return (EINVAL);

	/*
	 * We simply need to mark every object dirty, so that it will be
	 * synced out and now accounted.  If this is called
	 * concurrently, or if we already did some work before crashing,
	 * that's fine, since we track each object's accounted state
	 * independently.
	 */

	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
		dmu_tx_t *tx;
		dmu_buf_t *db;
		int objerr;

		if (issig(JUSTLOOKING) && issig(FORREAL))
			return (EINTR);

		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
		if (objerr)
			continue;
		tx = dmu_tx_create(os);
		dmu_tx_hold_bonus(tx, obj);
		objerr = dmu_tx_assign(tx, TXG_WAIT);
		if (objerr) {
			dmu_tx_abort(tx);
			continue;
		}
		dmu_buf_will_dirty(db, tx);
		dmu_buf_rele(db, FTAG);
		dmu_tx_commit(tx);
	}

	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
	txg_wait_synced(dmu_objset_pool(os), 0);
	return (0);
}
Exemple #3
0
uint64_t
dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
	objset_impl_t *osi = os->os;
	uint64_t object;
	uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
	    (osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
	dnode_t *dn = NULL;
	int restarted = B_FALSE;

	mutex_enter(&osi->os_obj_lock);
	for (;;) {
		object = osi->os_obj_next;
		/*
		 * Each time we polish off an L2 bp worth of dnodes
		 * (2^13 objects), move to another L2 bp that's still
		 * reasonably sparse (at most 1/4 full).  Look from the
		 * beginning once, but after that keep looking from here.
		 * If we can't find one, just keep going from here.
		 */
		if (P2PHASE(object, L2_dnode_count) == 0) {
			uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
			int error = dnode_next_offset(osi->os_meta_dnode,
			    DNODE_FIND_HOLE,
			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
			restarted = B_TRUE;
			if (error == 0)
				object = offset >> DNODE_SHIFT;
		}
		osi->os_obj_next = ++object;

		/*
		 * XXX We should check for an i/o error here and return
		 * up to our caller.  Actually we should pre-read it in
		 * dmu_tx_assign(), but there is currently no mechanism
		 * to do so.
		 */
		(void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
		    FTAG, &dn);
		if (dn)
			break;

		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
			osi->os_obj_next = object - 1;
	}
Exemple #4
0
uint64_t
dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
{
	uint64_t object;
	uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
	    (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
	dnode_t *dn = NULL;
	int dn_slots = dnodesize >> DNODE_SHIFT;
	boolean_t restarted = B_FALSE;

	if (dn_slots == 0) {
		dn_slots = DNODE_MIN_SLOTS;
	} else {
		ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
		ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
	}

	mutex_enter(&os->os_obj_lock);
	for (;;) {
		object = os->os_obj_next;
		/*
		 * Each time we polish off a L1 bp worth of dnodes (2^12
		 * objects), move to another L1 bp that's still
		 * reasonably sparse (at most 1/4 full). Look from the
		 * beginning at most once per txg. If we still can't
		 * allocate from that L1 block, search for an empty L0
		 * block, which will quickly skip to the end of the
		 * metadnode if the no nearby L0 blocks are empty. This
		 * fallback avoids a pathology where full dnode blocks
		 * containing large dnodes appear sparse because they
		 * have a low blk_fill, leading to many failed
		 * allocation attempts. In the long term a better
		 * mechanism to search for sparse metadnode regions,
		 * such as spacemaps, could be implemented.
		 *
		 * os_scan_dnodes is set during txg sync if enough objects
		 * have been freed since the previous rescan to justify
		 * backfilling again.
		 *
		 * Note that dmu_traverse depends on the behavior that we use
		 * multiple blocks of the dnode object before going back to
		 * reuse objects.  Any change to this algorithm should preserve
		 * that property or find another solution to the issues
		 * described in traverse_visitbp.
		 */
		if (P2PHASE(object, L1_dnode_count) == 0) {
			uint64_t offset;
			uint64_t blkfill;
			int minlvl;
			int error;
			if (os->os_rescan_dnodes) {
				offset = 0;
				os->os_rescan_dnodes = B_FALSE;
			} else {
				offset = object << DNODE_SHIFT;
			}
			blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
			minlvl = restarted ? 1 : 2;
			restarted = B_TRUE;
			error = dnode_next_offset(DMU_META_DNODE(os),
			    DNODE_FIND_HOLE, &offset, minlvl, blkfill, 0);
			if (error == 0)
				object = offset >> DNODE_SHIFT;
		}
		os->os_obj_next = object + dn_slots;

		/*
		 * XXX We should check for an i/o error here and return
		 * up to our caller.  Actually we should pre-read it in
		 * dmu_tx_assign(), but there is currently no mechanism
		 * to do so.
		 */
		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
		    FTAG, &dn);
		if (dn)
			break;

		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
			os->os_obj_next = object;
		else
			/*
			 * Skip to next known valid starting point for a dnode.
			 */
			os->os_obj_next = P2ROUNDUP(object + 1,
			    DNODES_PER_BLOCK);
	}