Exemplo n.º 1
0
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
				sector_t pblk, unsigned int len)
{
	struct fscrypt_ctx *ctx;
	struct page *ciphertext_page = NULL;
	struct bio *bio;
	int ret, err = 0;

	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);

	ctx = fscrypt_get_ctx(inode, GFP_NOFS);
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);

	ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
	if (IS_ERR(ciphertext_page)) {
		err = PTR_ERR(ciphertext_page);
		goto errout;
	}

	while (len--) {
		err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
					     ZERO_PAGE(0), ciphertext_page,
					     PAGE_SIZE, 0, GFP_NOFS);
		if (err)
			goto errout;

		bio = bio_alloc(GFP_NOWAIT, 1);
		if (!bio) {
			err = -ENOMEM;
			goto errout;
		}
		bio_set_dev(bio, inode->i_sb->s_bdev);
		bio->bi_iter.bi_sector =
			pblk << (inode->i_sb->s_blocksize_bits - 9);
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
		ret = bio_add_page(bio, ciphertext_page,
					inode->i_sb->s_blocksize, 0);
		if (ret != inode->i_sb->s_blocksize) {
			/* should never happen! */
			WARN_ON(1);
			bio_put(bio);
			err = -EIO;
			goto errout;
		}
		err = submit_bio_wait(bio);
		if (err == 0 && bio->bi_status)
			err = -EIO;
		bio_put(bio);
		if (err)
			goto errout;
		lblk++;
		pblk++;
	}
	err = 0;
errout:
	fscrypt_release_ctx(ctx);
	return err;
}
Exemplo n.º 2
0
static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
{
    bio->bi_end_io = mpage_end_io;
    bio_set_op_attrs(bio, op, op_flags);
    guard_bio_eod(op, bio);
    submit_bio(bio);
    return NULL;
}
Exemplo n.º 3
0
void pblk_submit_rec(struct work_struct *work)
{
	struct pblk_rec_ctx *recovery =
			container_of(work, struct pblk_rec_ctx, ws_rec);
	struct pblk *pblk = recovery->pblk;
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_rq *rqd = recovery->rqd;
	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
	int max_secs = nvm_max_phys_sects(dev);
	struct bio *bio;
	unsigned int nr_rec_secs;
	unsigned int pgs_read;
	int ret;

	nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
								max_secs);

	bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
	if (!bio) {
		pr_err("pblk: not able to create recovery bio\n");
		return;
	}

	bio->bi_iter.bi_sector = 0;
	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
	rqd->bio = bio;
	rqd->nr_ppas = nr_rec_secs;

	pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
								nr_rec_secs);
	if (pgs_read != nr_rec_secs) {
		pr_err("pblk: could not read recovery entries\n");
		goto err;
	}

	if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
		pr_err("pblk: could not setup recovery request\n");
		goto err;
	}

#ifdef CONFIG_NVM_DEBUG
	atomic_long_add(nr_rec_secs, &pblk->recov_writes);
#endif

	ret = pblk_submit_io(pblk, rqd);
	if (ret) {
		pr_err("pblk: I/O submission failed: %d\n", ret);
		goto err;
	}

	mempool_free(recovery, pblk->rec_pool);
	return;

err:
	bio_put(bio);
	pblk_free_rqd(pblk, rqd, WRITE);
}
Exemplo n.º 4
0
/**
 * blkdev_reset_zones - Reset zones write pointer
 * @bdev:	Target block device
 * @sector:	Start sector of the first zone to reset
 * @nr_sectors:	Number of sectors, at least the length of one zone
 * @gfp_mask:	Memory allocation flags (for bio_alloc)
 *
 * Description:
 *    Reset the write pointer of the zones contained in the range
 *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
 *    is valid, but the specified range should not contain conventional zones.
 */
int blkdev_reset_zones(struct block_device *bdev,
		       sector_t sector, sector_t nr_sectors,
		       gfp_t gfp_mask)
{
	struct request_queue *q = bdev_get_queue(bdev);
	sector_t zone_sectors;
	sector_t end_sector = sector + nr_sectors;
	struct bio *bio = NULL;
	struct blk_plug plug;
	int ret;

	if (!blk_queue_is_zoned(q))
		return -EOPNOTSUPP;

	if (bdev_read_only(bdev))
		return -EPERM;

	if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
		/* Out of range */
		return -EINVAL;

	/* Check alignment (handle eventual smaller last zone) */
	zone_sectors = blk_queue_zone_sectors(q);
	if (sector & (zone_sectors - 1))
		return -EINVAL;

	if ((nr_sectors & (zone_sectors - 1)) &&
	    end_sector != bdev->bd_part->nr_sects)
		return -EINVAL;

	blk_start_plug(&plug);
	while (sector < end_sector) {

		bio = blk_next_bio(bio, 0, gfp_mask);
		bio->bi_iter.bi_sector = sector;
		bio_set_dev(bio, bdev);
		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);

		sector += zone_sectors;

		/* This may take a while, so be nice to others */
		cond_resched();

	}

	ret = submit_bio_wait(bio);
	bio_put(bio);

	blk_finish_plug(&plug);

	return ret;
}
Exemplo n.º 5
0
/**
 * blkdev_reset_zones - Reset zones write pointer
 * @bdev:	Target block device
 * @sector:	Start sector of the first zone to reset
 * @nr_sectors:	Number of sectors, at least the length of one zone
 * @gfp_mask:	Memory allocation flags (for bio_alloc)
 *
 * Description:
 *    Reset the write pointer of the zones contained in the range
 *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
 *    is valid, but the specified range should not contain conventional zones.
 */
int blkdev_reset_zones(struct block_device *bdev,
		       sector_t sector, sector_t nr_sectors,
		       gfp_t gfp_mask)
{
	struct request_queue *q = bdev_get_queue(bdev);
	sector_t zone_sectors;
	sector_t end_sector = sector + nr_sectors;
	struct bio *bio;
	int ret;

	if (!q)
		return -ENXIO;

	if (!blk_queue_is_zoned(q))
		return -EOPNOTSUPP;

	if (end_sector > bdev->bd_part->nr_sects)
		/* Out of range */
		return -EINVAL;

	/* Check alignment (handle eventual smaller last zone) */
	zone_sectors = blk_queue_zone_size(q);
	if (sector & (zone_sectors - 1))
		return -EINVAL;

	if ((nr_sectors & (zone_sectors - 1)) &&
	    end_sector != bdev->bd_part->nr_sects)
		return -EINVAL;

	while (sector < end_sector) {

		bio = bio_alloc(gfp_mask, 0);
		bio->bi_iter.bi_sector = sector;
		bio->bi_bdev = bdev;
		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);

		ret = submit_bio_wait(bio);
		bio_put(bio);

		if (ret)
			return ret;

		sector += zone_sectors;

		/* This may take a while, so be nice to others */
		cond_resched();

	}

	return 0;
}
Exemplo n.º 6
0
static int write_metadata(struct log_writes_c *lc, void *entry,
			  size_t entrylen, void *data, size_t datalen,
			  sector_t sector)
{
	struct bio *bio;
	struct page *page;
	void *ptr;
	size_t ret;

	bio = bio_alloc(GFP_KERNEL, 1);
	if (!bio) {
		DMERR("Couldn't alloc log bio");
		goto error;
	}
	bio->bi_iter.bi_size = 0;
	bio->bi_iter.bi_sector = sector;
	bio->bi_bdev = lc->logdev->bdev;
	bio->bi_end_io = log_end_io;
	bio->bi_private = lc;
	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

	page = alloc_page(GFP_KERNEL);
	if (!page) {
		DMERR("Couldn't alloc log page");
		bio_put(bio);
		goto error;
	}

	ptr = kmap_atomic(page);
	memcpy(ptr, entry, entrylen);
	if (datalen)
		memcpy(ptr + entrylen, data, datalen);
	memset(ptr + entrylen + datalen, 0,
	       lc->sectorsize - entrylen - datalen);
	kunmap_atomic(ptr);

	ret = bio_add_page(bio, page, lc->sectorsize, 0);
	if (ret != lc->sectorsize) {
		DMERR("Couldn't add page to the log block");
		goto error_bio;
	}
	submit_bio(bio);
	return 0;
error_bio:
	bio_put(bio);
	__free_page(page);
error:
	put_io_block(lc);
	return -1;
}
Exemplo n.º 7
0
static int sync_request(struct page *page, struct block_device *bdev, int op)
{
	struct bio bio;
	struct bio_vec bio_vec;

	bio_init(&bio);
	bio.bi_max_vecs = 1;
	bio.bi_io_vec = &bio_vec;
	bio_vec.bv_page = page;
	bio_vec.bv_len = PAGE_SIZE;
	bio_vec.bv_offset = 0;
	bio.bi_vcnt = 1;
	bio.bi_bdev = bdev;
	bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
	bio.bi_iter.bi_size = PAGE_SIZE;
	bio_set_op_attrs(&bio, op, 0);

	return submit_bio_wait(&bio);
}
Exemplo n.º 8
0
static struct bio *
do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
		struct page *page, struct pnfs_block_dev_map *map,
		struct pnfs_block_extent *be, bio_end_io_t end_io,
		struct parallel_io *par, unsigned int offset, int *len)
{
	struct pnfs_block_dev *dev =
		container_of(be->be_device, struct pnfs_block_dev, node);
	u64 disk_addr, end;

	dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
		npg, rw, (unsigned long long)isect, offset, *len);

	/* translate to device offset */
	isect += be->be_v_offset;
	isect -= be->be_f_offset;

	/* translate to physical disk offset */
	disk_addr = (u64)isect << SECTOR_SHIFT;
	if (disk_addr < map->start || disk_addr >= map->start + map->len) {
		if (!dev->map(dev, disk_addr, map))
			return ERR_PTR(-EIO);
		bio = bl_submit_bio(bio);
	}
	disk_addr += map->disk_offset;
	disk_addr -= map->start;

	/* limit length to what the device mapping allows */
	end = disk_addr + *len;
	if (end >= map->start + map->len)
		*len = map->start + map->len - disk_addr;

retry:
	if (!bio) {
		bio = bl_alloc_init_bio(npg, map->bdev,
				disk_addr >> SECTOR_SHIFT, end_io, par);
		if (!bio)
			return ERR_PTR(-ENOMEM);
		bio_set_op_attrs(bio, rw, 0);
	}
Exemplo n.º 9
0
/**
 * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
 * @q:		request queue where request should be inserted
 * @rq:		request to fill
 * @kbuf:	the kernel buffer
 * @len:	length of user data
 * @gfp_mask:	memory allocation flags
 *
 * Description:
 *    Data will be mapped directly if possible. Otherwise a bounce
 *    buffer is used. Can be called multiple times to append multiple
 *    buffers.
 */
int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
		    unsigned int len, gfp_t gfp_mask)
{
	int reading = rq_data_dir(rq) == READ;
	unsigned long addr = (unsigned long) kbuf;
	int do_copy = 0;
	struct bio *bio;
	int ret;

	if (len > (queue_max_hw_sectors(q) << 9))
		return -EINVAL;
	if (!len || !kbuf)
		return -EINVAL;

	do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
	if (do_copy)
		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
	else
		bio = bio_map_kern(q, kbuf, len, gfp_mask);

	if (IS_ERR(bio))
		return PTR_ERR(bio);

	if (!reading)
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

	if (do_copy)
		rq->cmd_flags |= REQ_COPY_USER;

	ret = blk_rq_append_bio(q, rq, bio);
	if (unlikely(ret)) {
		/* request is too big */
		bio_put(bio);
		return ret;
	}

	blk_queue_bounce(q, &rq->bio);
	return 0;
}
Exemplo n.º 10
0
static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
{
	struct super_block *sb = sdp->sd_vfs;
	struct gfs2_sb *p;
	struct page *page;
	struct bio *bio;

	page = alloc_page(GFP_NOFS);
	if (unlikely(!page))
		return -ENOMEM;

	ClearPageUptodate(page);
	ClearPageDirty(page);
	lock_page(page);

	bio = bio_alloc(GFP_NOFS, 1);
	bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
	bio_set_dev(bio, sb->s_bdev);
	bio_add_page(bio, page, PAGE_SIZE, 0);

	bio->bi_end_io = end_bio_io_page;
	bio->bi_private = page;
	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
	submit_bio(bio);
	wait_on_page_locked(page);
	bio_put(bio);
	if (!PageUptodate(page)) {
		__free_page(page);
		return -EIO;
	}
	p = kmap(page);
	gfs2_sb_in(sdp, p);
	kunmap(page);
	__free_page(page);
	return gfs2_check_sb(sdp, silent);
}
Exemplo n.º 11
0
int ext4_mpage_readpages(struct address_space *mapping,
			 struct list_head *pages, struct page *page,
			 unsigned nr_pages, bool is_readahead)
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;

	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t blocks[MAX_BUF_PER_PAGE];
	unsigned page_block;
	struct block_device *bdev = inode->i_sb->s_bdev;
	int length;
	unsigned relative_block = 0;
	struct ext4_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;

	for (; nr_pages; nr_pages--) {
		int fully_mapped = 1;
		unsigned first_hole = blocks_per_page;

		prefetchw(&page->flags);
		if (pages) {
			page = lru_to_page(pages);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping, page->index,
				  readahead_gfp_mask(mapping)))
				goto next_page;
		}

		if (page_has_buffers(page))
			goto confused;

		block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
		last_block = block_in_file + nr_pages * blocks_per_page;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;
		page_block = 0;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & EXT4_MAP_MAPPED) &&
		    block_in_file > map.m_lblk &&
		    block_in_file < (map.m_lblk + map.m_len)) {
			unsigned map_offset = block_in_file - map.m_lblk;
			unsigned last = map.m_len - map_offset;

			for (relative_block = 0; ; relative_block++) {
				if (relative_block == last) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				}
				if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk + map_offset +
					relative_block;
				page_block++;
				block_in_file++;
			}
		}

		/*
		 * Then do more ext4_map_blocks() calls until we are
		 * done with this page.
		 */
		while (page_block < blocks_per_page) {
			if (block_in_file < last_block) {
				map.m_lblk = block_in_file;
				map.m_len = last_block - block_in_file;

				if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
				set_error_page:
					SetPageError(page);
					zero_user_segment(page, 0,
							  PAGE_SIZE);
					unlock_page(page);
					goto next_page;
				}
			}
			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
				fully_mapped = 0;
				if (first_hole == blocks_per_page)
					first_hole = page_block;
				page_block++;
				block_in_file++;
				continue;
			}
			if (first_hole != blocks_per_page)
				goto confused;		/* hole -> non-hole */

			/* Contiguous blocks? */
			if (page_block && blocks[page_block-1] != map.m_pblk-1)
				goto confused;
			for (relative_block = 0; ; relative_block++) {
				if (relative_block == map.m_len) {
					/* needed? */
					map.m_flags &= ~EXT4_MAP_MAPPED;
					break;
				} else if (page_block == blocks_per_page)
					break;
				blocks[page_block] = map.m_pblk+relative_block;
				page_block++;
				block_in_file++;
			}
		}
		if (first_hole != blocks_per_page) {
			zero_user_segment(page, first_hole << blkbits,
					  PAGE_SIZE);
			if (first_hole == 0) {
				SetPageUptodate(page);
				unlock_page(page);
				goto next_page;
			}
		} else if (fully_mapped) {
			SetPageMappedToDisk(page);
		}
		if (fully_mapped && blocks_per_page == 1 &&
		    !PageUptodate(page) && cleancache_get_page(page) == 0) {
			SetPageUptodate(page);
			goto confused;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != blocks[0] - 1)) {
		submit_and_realloc:
			submit_bio(bio);
			bio = NULL;
		}
		if (bio == NULL) {
			struct fscrypt_ctx *ctx = NULL;

			if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
				ctx = fscrypt_get_ctx(inode, GFP_NOFS);
				if (IS_ERR(ctx))
					goto set_error_page;
			}
			bio = bio_alloc(GFP_KERNEL,
				min_t(int, nr_pages, BIO_MAX_PAGES));
			if (!bio) {
				if (ctx)
					fscrypt_release_ctx(ctx);
				goto set_error_page;
			}
			bio_set_dev(bio, bdev);
			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
			bio->bi_end_io = mpage_end_io;
			bio->bi_private = ctx;
			bio_set_op_attrs(bio, REQ_OP_READ,
						is_readahead ? REQ_RAHEAD : 0);
		}

		length = first_hole << blkbits;
		if (bio_add_page(bio, page, length, 0) < length)
			goto submit_and_realloc;

		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
		     (relative_block == map.m_len)) ||
		    (first_hole != blocks_per_page)) {
			submit_bio(bio);
			bio = NULL;
		} else
			last_block_in_bio = blocks[blocks_per_page - 1];
		goto next_page;
	confused:
		if (bio) {
			submit_bio(bio);
			bio = NULL;
		}
		if (!PageUptodate(page))
			block_read_full_page(page, ext4_get_block);
		else
			unlock_page(page);
	next_page:
		if (pages)
			put_page(page);
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		submit_bio(bio);
	return 0;
}
Exemplo n.º 12
0
static int log_one_block(struct log_writes_c *lc,
			 struct pending_block *block, sector_t sector)
{
	struct bio *bio;
	struct log_write_entry entry;
	size_t ret;
	int i;

	entry.sector = cpu_to_le64(block->sector);
	entry.nr_sectors = cpu_to_le64(block->nr_sectors);
	entry.flags = cpu_to_le64(block->flags);
	entry.data_len = cpu_to_le64(block->datalen);
	if (write_metadata(lc, &entry, sizeof(entry), block->data,
			   block->datalen, sector)) {
		free_pending_block(lc, block);
		return -1;
	}

	if (!block->vec_cnt)
		goto out;
	sector++;

	bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
	if (!bio) {
		DMERR("Couldn't alloc log bio");
		goto error;
	}
	atomic_inc(&lc->io_blocks);
	bio->bi_iter.bi_size = 0;
	bio->bi_iter.bi_sector = sector;
	bio->bi_bdev = lc->logdev->bdev;
	bio->bi_end_io = log_end_io;
	bio->bi_private = lc;
	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

	for (i = 0; i < block->vec_cnt; i++) {
		/*
		 * The page offset is always 0 because we allocate a new page
		 * for every bvec in the original bio for simplicity sake.
		 */
		ret = bio_add_page(bio, block->vecs[i].bv_page,
				   block->vecs[i].bv_len, 0);
		if (ret != block->vecs[i].bv_len) {
			atomic_inc(&lc->io_blocks);
			submit_bio(bio);
			bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
			if (!bio) {
				DMERR("Couldn't alloc log bio");
				goto error;
			}
			bio->bi_iter.bi_size = 0;
			bio->bi_iter.bi_sector = sector;
			bio->bi_bdev = lc->logdev->bdev;
			bio->bi_end_io = log_end_io;
			bio->bi_private = lc;
			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

			ret = bio_add_page(bio, block->vecs[i].bv_page,
					   block->vecs[i].bv_len, 0);
			if (ret != block->vecs[i].bv_len) {
				DMERR("Couldn't add page on new bio?");
				bio_put(bio);
				goto error;
			}
		}
		sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
	}
	submit_bio(bio);
out:
	kfree(block->data);
	kfree(block);
	put_pending_block(lc);
	return 0;
error:
	free_pending_block(lc, block);
	put_io_block(lc);
	return -1;
}
Exemplo n.º 13
0
/**
 * blkdev_report_zones - Get zones information
 * @bdev:	Target block device
 * @sector:	Sector from which to report zones
 * @zones:	Array of zone structures where to return the zones information
 * @nr_zones:	Number of zone structures in the zone array
 * @gfp_mask:	Memory allocation flags (for bio_alloc)
 *
 * Description:
 *    Get zone information starting from the zone containing @sector.
 *    The number of zone information reported may be less than the number
 *    requested by @nr_zones. The number of zones actually reported is
 *    returned in @nr_zones.
 */
int blkdev_report_zones(struct block_device *bdev,
			sector_t sector,
			struct blk_zone *zones,
			unsigned int *nr_zones,
			gfp_t gfp_mask)
{
	struct request_queue *q = bdev_get_queue(bdev);
	struct blk_zone_report_hdr *hdr;
	unsigned int nrz = *nr_zones;
	struct page *page;
	unsigned int nr_rep;
	size_t rep_bytes;
	unsigned int nr_pages;
	struct bio *bio;
	struct bio_vec *bv;
	unsigned int i, n, nz;
	unsigned int ofst;
	void *addr;
	int ret = 0;

	if (!q)
		return -ENXIO;

	if (!blk_queue_is_zoned(q))
		return -EOPNOTSUPP;

	if (!nrz)
		return 0;

	if (sector > bdev->bd_part->nr_sects) {
		*nr_zones = 0;
		return 0;
	}

	/*
	 * The zone report has a header. So make room for it in the
	 * payload. Also make sure that the report fits in a single BIO
	 * that will not be split down the stack.
	 */
	rep_bytes = sizeof(struct blk_zone_report_hdr) +
		sizeof(struct blk_zone) * nrz;
	rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
	if (rep_bytes > (queue_max_sectors(q) << 9))
		rep_bytes = queue_max_sectors(q) << 9;

	nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
			 rep_bytes >> PAGE_SHIFT);
	nr_pages = min_t(unsigned int, nr_pages,
			 queue_max_segments(q));

	bio = bio_alloc(gfp_mask, nr_pages);
	if (!bio)
		return -ENOMEM;

	bio->bi_bdev = bdev;
	bio->bi_iter.bi_sector = blk_zone_start(q, sector);
	bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);

	for (i = 0; i < nr_pages; i++) {
		page = alloc_page(gfp_mask);
		if (!page) {
			ret = -ENOMEM;
			goto out;
		}
		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
			__free_page(page);
			break;
		}
	}

	if (i == 0)
		ret = -ENOMEM;
	else
		ret = submit_bio_wait(bio);
	if (ret)
		goto out;

	/*
	 * Process the report result: skip the header and go through the
	 * reported zones to fixup and fixup the zone information for
	 * partitions. At the same time, return the zone information into
	 * the zone array.
	 */
	n = 0;
	nz = 0;
	nr_rep = 0;
	bio_for_each_segment_all(bv, bio, i) {

		if (!bv->bv_page)
			break;

		addr = kmap_atomic(bv->bv_page);

		/* Get header in the first page */
		ofst = 0;
		if (!nr_rep) {
			hdr = (struct blk_zone_report_hdr *) addr;
			nr_rep = hdr->nr_zones;
			ofst = sizeof(struct blk_zone_report_hdr);
		}

		/* Fixup and report zones */
		while (ofst < bv->bv_len &&
		       n < nr_rep && nz < nrz) {
			if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
				nz++;
			ofst += sizeof(struct blk_zone);
			n++;
		}

		kunmap_atomic(addr);

		if (n >= nr_rep || nz >= nrz)
			break;

	}

out:
	bio_for_each_segment_all(bv, bio, i)
		__free_page(bv->bv_page);
	bio_put(bio);

	if (ret == 0)
		*nr_zones = nz;

	return ret;
}
Exemplo n.º 14
0
/*
 * rrpc_move_valid_pages -- migrate live data off the block
 * @rrpc: the 'rrpc' structure
 * @block: the block from which to migrate live pages
 *
 * Description:
 *   GC algorithms may call this function to migrate remaining live
 *   pages off the block prior to erasing it. This function blocks
 *   further execution until the operation is complete.
 */
static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
{
	struct nvm_tgt_dev *dev = rrpc->dev;
	struct request_queue *q = dev->q;
	struct rrpc_rev_addr *rev;
	struct nvm_rq *rqd;
	struct bio *bio;
	struct page *page;
	int slot;
	int nr_sec_per_blk = dev->geo.sec_per_blk;
	u64 phys_addr;
	DECLARE_COMPLETION_ONSTACK(wait);

	if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
		return 0;

	bio = bio_alloc(GFP_NOIO, 1);
	if (!bio) {
		pr_err("nvm: could not alloc bio to gc\n");
		return -ENOMEM;
	}

	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
	if (!page) {
		bio_put(bio);
		return -ENOMEM;
	}

	while ((slot = find_first_zero_bit(rblk->invalid_pages,
					    nr_sec_per_blk)) < nr_sec_per_blk) {

		/* Lock laddr */
		phys_addr = rrpc_blk_to_ppa(rrpc, rblk) + slot;

try:
		spin_lock(&rrpc->rev_lock);
		/* Get logical address from physical to logical table */
		rev = &rrpc->rev_trans_map[phys_addr];
		/* already updated by previous regular write */
		if (rev->addr == ADDR_EMPTY) {
			spin_unlock(&rrpc->rev_lock);
			continue;
		}

		rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
		if (IS_ERR_OR_NULL(rqd)) {
			spin_unlock(&rrpc->rev_lock);
			schedule();
			goto try;
		}

		spin_unlock(&rrpc->rev_lock);

		/* Perform read to do GC */
		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
		bio_set_op_attrs(bio,  REQ_OP_READ, 0);
		bio->bi_private = &wait;
		bio->bi_end_io = rrpc_end_sync_bio;

		/* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */
		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);

		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
			pr_err("rrpc: gc read failed.\n");
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}
		wait_for_completion_io(&wait);
		if (bio->bi_error) {
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}

		bio_reset(bio);
		reinit_completion(&wait);

		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
		bio->bi_private = &wait;
		bio->bi_end_io = rrpc_end_sync_bio;

		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);

		/* turn the command around and write the data back to a new
		 * address
		 */
		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
			pr_err("rrpc: gc write failed.\n");
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}
		wait_for_completion_io(&wait);

		rrpc_inflight_laddr_release(rrpc, rqd);
		if (bio->bi_error)
			goto finished;

		bio_reset(bio);
	}

finished:
	mempool_free(page, rrpc->page_pool);
	bio_put(bio);

	if (!bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) {
		pr_err("nvm: failed to garbage collect block\n");
		return -EIO;
	}

	return 0;
}

static void rrpc_block_gc(struct work_struct *work)
{
	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
									ws_gc);
	struct rrpc *rrpc = gcb->rrpc;
	struct rrpc_block *rblk = gcb->rblk;
	struct rrpc_lun *rlun = rblk->rlun;
	struct nvm_tgt_dev *dev = rrpc->dev;
	struct ppa_addr ppa;

	mempool_free(gcb, rrpc->gcb_pool);
	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' being reclaimed\n",
			rlun->bppa.g.ch, rlun->bppa.g.lun,
			rblk->id);

	if (rrpc_move_valid_pages(rrpc, rblk))
		goto put_back;

	ppa.ppa = 0;
	ppa.g.ch = rlun->bppa.g.ch;
	ppa.g.lun = rlun->bppa.g.lun;
	ppa.g.blk = rblk->id;

	if (nvm_erase_blk(dev, &ppa, 0))
		goto put_back;

	rrpc_put_blk(rrpc, rblk);

	return;

put_back:
	spin_lock(&rlun->lock);
	list_add_tail(&rblk->prio, &rlun->prio_list);
	spin_unlock(&rlun->lock);
}

/* the block with highest number of invalid pages, will be in the beginning
 * of the list
 */
static struct rrpc_block *rblk_max_invalid(struct rrpc_block *ra,
							struct rrpc_block *rb)
{
	if (ra->nr_invalid_pages == rb->nr_invalid_pages)
		return ra;

	return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra;
}

/* linearly find the block with highest number of invalid pages
 * requires lun->lock
 */
static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
{
	struct list_head *prio_list = &rlun->prio_list;
	struct rrpc_block *rblk, *max;

	BUG_ON(list_empty(prio_list));

	max = list_first_entry(prio_list, struct rrpc_block, prio);
	list_for_each_entry(rblk, prio_list, prio)
		max = rblk_max_invalid(max, rblk);

	return max;
}

static void rrpc_lun_gc(struct work_struct *work)
{
	struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
	struct rrpc *rrpc = rlun->rrpc;
	struct nvm_tgt_dev *dev = rrpc->dev;
	struct rrpc_block_gc *gcb;
	unsigned int nr_blocks_need;

	nr_blocks_need = dev->geo.blks_per_lun / GC_LIMIT_INVERSE;

	if (nr_blocks_need < rrpc->nr_luns)
		nr_blocks_need = rrpc->nr_luns;

	spin_lock(&rlun->lock);
	while (nr_blocks_need > rlun->nr_free_blocks &&
					!list_empty(&rlun->prio_list)) {
		struct rrpc_block *rblk = block_prio_find_max(rlun);

		if (!rblk->nr_invalid_pages)
			break;

		gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
		if (!gcb)
			break;

		list_del_init(&rblk->prio);

		WARN_ON(!block_is_full(rrpc, rblk));

		pr_debug("rrpc: selected block 'ch:%d,lun:%d,blk:%d' for GC\n",
					rlun->bppa.g.ch, rlun->bppa.g.lun,
					rblk->id);

		gcb->rrpc = rrpc;
		gcb->rblk = rblk;
		INIT_WORK(&gcb->ws_gc, rrpc_block_gc);

		queue_work(rrpc->kgc_wq, &gcb->ws_gc);

		nr_blocks_need--;
	}
	spin_unlock(&rlun->lock);

	/* TODO: Hint that request queue can be started again */
}

static void rrpc_gc_queue(struct work_struct *work)
{
	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
									ws_gc);
	struct rrpc *rrpc = gcb->rrpc;
	struct rrpc_block *rblk = gcb->rblk;
	struct rrpc_lun *rlun = rblk->rlun;

	spin_lock(&rlun->lock);
	list_add_tail(&rblk->prio, &rlun->prio_list);
	spin_unlock(&rlun->lock);

	mempool_free(gcb, rrpc->gcb_pool);
	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' full, allow GC (sched)\n",
					rlun->bppa.g.ch, rlun->bppa.g.lun,
					rblk->id);
}

static const struct block_device_operations rrpc_fops = {
	.owner		= THIS_MODULE,
};

static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
{
	unsigned int i;
	struct rrpc_lun *rlun, *max_free;

	if (!is_gc)
		return get_next_lun(rrpc);

	/* during GC, we don't care about RR, instead we want to make
	 * sure that we maintain evenness between the block luns.
	 */
	max_free = &rrpc->luns[0];
	/* prevent GC-ing lun from devouring pages of a lun with
	 * little free blocks. We don't take the lock as we only need an
	 * estimate.
	 */
	rrpc_for_each_lun(rrpc, rlun, i) {
		if (rlun->nr_free_blocks > max_free->nr_free_blocks)
			max_free = rlun;
	}

	return max_free;
}