示例#1
0
文件: dnode_sync.c 项目: 64116278/zfs
static void
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
    dmu_tx_t *tx)
{
	dnode_t *dn;
	blkptr_t *bp;
	dmu_buf_impl_t *subdb;
	uint64_t start, end, dbstart, dbend, i;
	int epbs, shift;

	/*
	 * There is a small possibility that this block will not be cached:
	 *   1 - if level > 1 and there are no children with level <= 1
	 *   2 - if this block was evicted since we read it from
	 *	 dmu_tx_hold_free().
	 */
	if (db->db_state != DB_CACHED)
		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);

	dbuf_release_bp(db);
	bp = db->db.db_data;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	shift = (db->db_level - 1) * epbs;
	dbstart = db->db_blkid << epbs;
	start = blkid >> shift;
	if (dbstart < start) {
		bp += start - dbstart;
	} else {
		start = dbstart;
	}
	dbend = ((db->db_blkid + 1) << epbs) - 1;
	end = (blkid + nblks - 1) >> shift;
	if (dbend <= end)
		end = dbend;

	ASSERT3U(start, <=, end);

	if (db->db_level == 1) {
		FREE_VERIFY(db, start, end, tx);
		free_blocks(dn, bp, end-start+1, tx);
	} else {
		for (i = start; i <= end; i++, bp++) {
			if (BP_IS_HOLE(bp))
				continue;
			rw_enter(&dn->dn_struct_rwlock, RW_READER);
			VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
			    i, TRUE, FALSE, FTAG, &subdb));
			rw_exit(&dn->dn_struct_rwlock);
			ASSERT3P(bp, ==, subdb->db_blkptr);

			free_children(subdb, blkid, nblks, tx);
			dbuf_rele(subdb, FTAG);
		}
	}

	/* If this whole block is free, free ourself too. */
	for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
		if (!BP_IS_HOLE(bp))
			break;
	}
	if (i == 1 << epbs) {
		/* didn't find any non-holes */
		bzero(db->db.db_data, db->db.db_size);
		free_blocks(dn, db->db_blkptr, 1, tx);
	} else {
		/*
		 * Partial block free; must be marked dirty so that it
		 * will be written out.
		 */
		ASSERT(db->db_dirtycnt > 0);
	}

	DB_DNODE_EXIT(db);
	arc_buf_freeze(db->db_buf);
}
示例#2
0
/* ARGSUSED */
static void
dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
{
	dnode_t *dn = txh->txh_dnode;
	uint64_t start, end, i;
	int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
	int err = 0;

	if (len == 0)
		return;

	min_bs = SPA_MINBLOCKSHIFT;
	max_bs = SPA_MAXBLOCKSHIFT;
	min_ibs = DN_MIN_INDBLKSHIFT;
	max_ibs = DN_MAX_INDBLKSHIFT;

	if (dn) {
		uint64_t history[DN_MAX_LEVELS];
		int nlvls = dn->dn_nlevels;
		int delta;

		/*
		 * For i/o error checking, read the first and last level-0
		 * blocks (if they are not aligned), and all the level-1 blocks.
		 */
		if (dn->dn_maxblkid == 0) {
			delta = dn->dn_datablksz;
			start = (off < dn->dn_datablksz) ? 0 : 1;
			end = (off+len <= dn->dn_datablksz) ? 0 : 1;
			if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
				err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
				if (err)
					goto out;
				delta -= off;
			}
		} else {
			zio_t *zio = zio_root(dn->dn_objset->os_spa,
			    NULL, NULL, ZIO_FLAG_CANFAIL);

			/* first level-0 block */
			start = off >> dn->dn_datablkshift;
			if (P2PHASE(off, dn->dn_datablksz) ||
			    len < dn->dn_datablksz) {
				err = dmu_tx_check_ioerr(zio, dn, 0, start);
				if (err)
					goto out;
			}

			/* last level-0 block */
			end = (off+len-1) >> dn->dn_datablkshift;
			if (end != start && end <= dn->dn_maxblkid &&
			    P2PHASE(off+len, dn->dn_datablksz)) {
				err = dmu_tx_check_ioerr(zio, dn, 0, end);
				if (err)
					goto out;
			}

			/* level-1 blocks */
			if (nlvls > 1) {
				int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
				for (i = (start>>shft)+1; i < end>>shft; i++) {
					err = dmu_tx_check_ioerr(zio, dn, 1, i);
					if (err)
						goto out;
				}
			}

			err = zio_wait(zio);
			if (err)
				goto out;
			delta = P2NPHASE(off, dn->dn_datablksz);
		}

		min_ibs = max_ibs = dn->dn_indblkshift;
		if (dn->dn_maxblkid > 0) {
			/*
			 * The blocksize can't change,
			 * so we can make a more precise estimate.
			 */
			ASSERT(dn->dn_datablkshift != 0);
			min_bs = max_bs = dn->dn_datablkshift;
		}

		/*
		 * If this write is not off the end of the file
		 * we need to account for overwrites/unref.
		 */
		if (start <= dn->dn_maxblkid) {
			for (int l = 0; l < DN_MAX_LEVELS; l++)
				history[l] = -1ULL;
		}
		while (start <= dn->dn_maxblkid) {
			dmu_buf_impl_t *db;

			rw_enter(&dn->dn_struct_rwlock, RW_READER);
			err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
			rw_exit(&dn->dn_struct_rwlock);

			if (err) {
				txh->txh_tx->tx_err = err;
				return;
			}

			dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
			    history);
			dbuf_rele(db, FTAG);
			if (++start > end) {
				/*
				 * Account for new indirects appearing
				 * before this IO gets assigned into a txg.
				 */
				bits = 64 - min_bs;
				epbs = min_ibs - SPA_BLKPTRSHIFT;
				for (bits -= epbs * (nlvls - 1);
				    bits >= 0; bits -= epbs)
					txh->txh_fudge += 1ULL << max_ibs;
				goto out;
			}
			off += delta;
			if (len >= delta)
				len -= delta;
			delta = dn->dn_datablksz;
		}
	}
示例#3
0
文件: dnode_sync.c 项目: 64116278/zfs
static void
free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
{
	int off, num;
	int i, err, epbs;
	uint64_t txg = tx->tx_txg;
	dnode_t *dn;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	off = start - (db->db_blkid * 1<<epbs);
	num = end - start + 1;

	ASSERT3U(off, >=, 0);
	ASSERT3U(num, >=, 0);
	ASSERT3U(db->db_level, >, 0);
	ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
	ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
	ASSERT(db->db_blkptr != NULL);

	for (i = off; i < off+num; i++) {
		uint64_t *buf;
		dmu_buf_impl_t *child;
		dbuf_dirty_record_t *dr;
		int j;

		ASSERT(db->db_level == 1);

		rw_enter(&dn->dn_struct_rwlock, RW_READER);
		err = dbuf_hold_impl(dn, db->db_level-1,
		    (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
		rw_exit(&dn->dn_struct_rwlock);
		if (err == ENOENT)
			continue;
		ASSERT(err == 0);
		ASSERT(child->db_level == 0);
		dr = child->db_last_dirty;
		while (dr && dr->dr_txg > txg)
			dr = dr->dr_next;
		ASSERT(dr == NULL || dr->dr_txg == txg);

		/* data_old better be zeroed */
		if (dr) {
			buf = dr->dt.dl.dr_data->b_data;
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}

		/*
		 * db_data better be zeroed unless it's dirty in a
		 * future txg.
		 */
		mutex_enter(&child->db_mtx);
		buf = child->db.db_data;
		if (buf != NULL && child->db_state != DB_FILL &&
		    child->db_last_dirty == NULL) {
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}
		mutex_exit(&child->db_mtx);

		dbuf_rele(child, FTAG);
	}
	DB_DNODE_EXIT(db);
}