/* * simp_blkdev_make_request */ static void simp_blkdev_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec *bvec; int i; void *dsk_mem; /* 判断 IO 请求是否超出实际 RanDisk 大小 */ if((bio->bi_sector << 9) + bio->bi_size > SIMP_BLKDEV_BYTES) { printk(KERN_ERR SIMP_BLKDEV_DISKNAME ": bad request: block=%llu, count=%u\n", (unsigned long long)bio->bi_sector, bio->bi_size); /* 条件编译,对于 2.4 内核之后的变量多了一个,伤不起阿! */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) /* 返回这个对 bio 请求的处理结果,-EIO 表示出错 */ bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return ; } dsk_mem = simp_blkdev_data + (bio->bi_sector << 9); /* 遍历 bio 中所有的 bvec */ bio_for_each_segment(bvec, bio, i) { void *iovec_mem; switch(bio_rw(bio)) { case READ: case READA: /* 读和预读都进行同样的处理 */ /* 用 kmalloc 将请求页映射到非线性映射区域进行 * 访问,这种方法主要是为了兼容高端内存, * (bvec->bv_page 可能源于高端内存) */ iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(iovec_mem, dsk_mem, bvec->bv_len); kunmap(bvec->bv_page); break; case WRITE: iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(dsk_mem, iovec_mem, bvec->bv_len); kunmap(bvec->bv_page); break; default: printk(KERN_ERR SIMP_BLKDEV_DISKNAME ":Unknown value of bio_rw: %lu\n", bio_rw(bio)); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return ; } dsk_mem += bvec->bv_len; }
static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) { struct request_queue *q = dev->q; struct request *rq; struct bio *bio = rqd->bio; rq = blk_mq_alloc_request(q, bio_rw(bio), 0); if (IS_ERR(rq)) return -ENOMEM; rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->__sector = bio->bi_iter.bi_sector; rq->ioprio = bio_prio(bio); if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io); return 0; }
static int Virtual_blkdev_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec *bvec; int i; void *dsk_mem; if ((bio->bi_sector << 9) + bio->bi_size > VIRTUAL_BLKDEV_BYTES) { printk(KERN_ERR VIRTUAL_BLKDEV_DISKNAME ": bad request: block=%llu, count=%u\n", (unsigned long long)bio->bi_sector, bio->bi_size); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return 0; } dsk_mem = Virtual_blkdev_data + (bio->bi_sector << 9); bio_for_each_segment(bvec, bio, i) { void *iovec_mem; switch (bio_rw(bio)) { case READ: case READA: iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(iovec_mem, dsk_mem, bvec->bv_len); kunmap(bvec->bv_page); break; case WRITE: iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(dsk_mem, iovec_mem, bvec->bv_len); kunmap(bvec->bv_page); break; default: printk(KERN_ERR VIRTUAL_BLKDEV_DISKNAME ": unknown value of bio_rw: %lu\n", bio_rw(bio)); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return 0; } dsk_mem += bvec->bv_len; }
/* * The request function that just remaps the bio built up by * dm_merge_bvec. */ static int dm_request(request_queue_t *q, struct bio *bio) { int r; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; /* * There is no use in forwarding any barrier request since we can't * guarantee it is (or can be) handled by the targets correctly. */ if (unlikely(bio_barrier(bio))) { bio_endio(bio, bio->bi_size, -EOPNOTSUPP); return 0; } down_read(&md->io_lock); disk_stat_inc(dm_disk(md), ios[rw]); disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); /* * If we're suspended we have to queue * this io for later. */ while (test_bit(DMF_BLOCK_IO, &md->flags)) { up_read(&md->io_lock); if (bio_rw(bio) == READA) { bio_io_error(bio, bio->bi_size); return 0; } r = queue_io(md, bio); if (r < 0) { bio_io_error(bio, bio->bi_size); return 0; } else if (r == 0) return 0; /* deferred successfully */ /* * We're in a while loop, because someone could suspend * before we get to the following read lock. */ down_read(&md->io_lock); } __split_bio(md, bio); up_read(&md->io_lock); return 0; }
static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, unsigned long flags, uint8_t npages) { if (npages > 1) { rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL, &rqd->dma_ppa_list); if (!rqd->ppa_list) { pr_err("rrpc: not able to allocate ppa list\n"); return NVM_IO_ERR; } if (bio_rw(bio) == WRITE) return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags, npages); return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages); } if (bio_rw(bio) == WRITE) return rrpc_write_rq(rrpc, bio, rqd, flags); return rrpc_read_rq(rrpc, bio, rqd, flags); }
/* * Return zeros only on reads */ static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: zero_fill_bio(bio); break; case READA: /* readahead of null bytes only wastes buffer cache */ return -EIO; case WRITE: /* writes get silently dropped */ break; } bio_endio(bio, 0); /* accepted bio, don't make new request */ return DM_MAPIO_SUBMITTED; }
/* * The request function that just remaps the bio built up by * dm_merge_bvec. */ static int dm_request(request_queue_t *q, struct bio *bio) { int r; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; down_read(&md->io_lock); disk_stat_inc(dm_disk(md), ios[rw]); disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); /* * If we're suspended we have to queue * this io for later. */ while (test_bit(DMF_BLOCK_IO, &md->flags)) { up_read(&md->io_lock); if (bio_rw(bio) == READA) { bio_io_error(bio, bio->bi_size); return 0; } r = queue_io(md, bio); if (r < 0) { bio_io_error(bio, bio->bi_size); return 0; } else if (r == 0) return 0; /* deferred successfully */ /* * We're in a while loop, because someone could suspend * before we get to the following read lock. */ down_read(&md->io_lock); } __split_bio(md, bio); up_read(&md->io_lock); return 0; }
/* * Return zeros only on reads */ static int zero_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { switch(bio_rw(bio)) { case READ: zero_fill_bio(bio); break; case READA: /* readahead of null bytes only wastes buffer cache */ return -EIO; case WRITE: /* writes get silently dropped */ break; } bio_endio(bio, bio->bi_size, 0); /* accepted bio, don't make new request */ return 0; }
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request */ void drbd_endio_pri(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; struct drbd_conf *mdev = req->mdev; struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); if (error) dev_warn(DEV, "p %s: error=%d\n", bio_data_dir(bio) == WRITE ? "write" : "read", error); if (!error && !uptodate) { dev_warn(DEV, "p %s: setting error to -EIO\n", bio_data_dir(bio) == WRITE ? "write" : "read"); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ error = -EIO; } /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) ? write_completed_with_error : (bio_rw(bio) == READ) ? read_completed_with_error : read_ahead_completed_with_error; } else what = completed_ok; bio_put(req->private_bio); req->private_bio = ERR_PTR(error); spin_lock_irqsave(&mdev->req_lock, flags); __req_mod(req, what, &m); spin_unlock_irqrestore(&mdev->req_lock, flags); if (m.bio) complete_master_bio(mdev, &m); }
static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) { struct request_queue *q = dev->q; struct nvme_ns *ns = q->queuedata; struct request *rq; struct bio *bio = rqd->bio; struct nvme_nvm_command *cmd; rq = blk_mq_alloc_request(q, bio_rw(bio), 0); if (IS_ERR(rq)) return -ENOMEM; cmd = kzalloc(sizeof(struct nvme_nvm_command) + sizeof(struct nvme_nvm_completion), GFP_KERNEL); if (!cmd) { blk_mq_free_request(rq); return -ENOMEM; } rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->ioprio = bio_prio(bio); if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; nvme_nvm_rqtocmd(rq, rqd, ns, cmd); rq->cmd = (unsigned char *)cmd; rq->cmd_len = sizeof(struct nvme_nvm_command); rq->special = cmd + 1; rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); return 0; }
static int sbd_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec *bvec; int i; unsigned long long dsk_offset; dsk_offset = bio->bi_sector * 512; bio_for_each_segment(bvec, bio, i){ unsigned int count_done, count_current; void *dsk_mem; void *iovec_mem; iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; count_done = 0; while(count_done < bvec->bv_len){ count_current = min(bvec->bv_len - count_done, PAGE_SIZE - (dsk_offset + count_done) % PAGE_SIZE); dsk_mem = radix_tree_lookup(&sbd_data, (dsk_offset + count_done) / PAGE_SIZE); dsk_mem += (dsk_offset + count_done) % PAGE_SIZE; switch(bio_rw(bio)){ case READ: case READA: memcpy(iovec_mem, dsk_mem, count_current); break; case WRITE: memcpy(dsk_mem, iovec_mem, count_current); break; } count_done += count_current; } kunmap(bvec->bv_page); dsk_offset += bvec->bv_len; }
int simp_blkdev_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec * bvec; int i; void *disk_mem; if( (bio->bi_sector<<9)+bio->bi_size > DEV_CAPACITY ) { printk("simp_blkdev: bad request: S=%lu, nS=%u\n", (unsigned long)bio->bi_sector, bio->bi_size); bio_endio(bio, -EIO); return 0; } disk_mem = simp_blkdev_data + (bio->bi_sector<<9); bio_for_each_segment(bvec, bio, i) { void *iovec_mem; switch(bio_rw(bio)) { case READ: case READA:/*预读*/ iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(iovec_mem, disk_mem, bvec->bv_len); kunmap(bvec->bv_page); break; case WRITE: iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(disk_mem, iovec_mem, bvec->bv_len); kunmap(bvec->bv_page); break; default: printk("blkdev: unspported bio type\n"); bio_endio(bio, -EIO); return 0; } disk_mem += bvec->bv_len; }
static int simpleblk_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec *bvec; int i; void *dsk_mem; if ((bio->bi_sector << 9) + bio->bi_size > MEMBLK_BYTES) { #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)) //XXX not very accurate bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return -EFAULT; } dsk_mem = blkdev_data + (bio->bi_sector << 9);//in fact the bio->bi_sector is sector offset not real addr. switch (bio_rw(bio)) { case READ: case READA: bio_for_each_segment(bvec, bio, i) { void *iovec_mem; iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(iovec_mem, dsk_mem, bvec->bv_len); kunmap(bvec->bv_page); dsk_mem += bvec->bv_len; } break; case WRITE: bio_for_each_segment(bvec, bio, i) { void *iovec_mem; iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; memcpy(dsk_mem, iovec_mem, bvec->bv_len); kunmap(bvec->bv_page); dsk_mem += bvec->bv_len; }
/* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), * if it has already been completed, or cannot be completed yet. * If m->bio is set, the error status to be returned is placed in m->error. */ static void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) { const unsigned s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; int rw; int error, ok; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) * not yet acknowledged by the peer * not yet completed by the local io subsystem * these flags may get cleared in any order by * the worker, * the receiver, * the bio_endio completion callbacks. */ if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) || (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) || (s & RQ_COMPLETION_SUSP)) { dev_err(DEV, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s); return; } if (!req->master_bio) { dev_err(DEV, "drbd_req_complete: Logic BUG, master_bio == NULL!\n"); return; } rw = bio_rw(req->master_bio); /* * figure out whether to report success or failure. * * report success when at least one of the operations succeeded. * or, to put the other way, * only report failure, when both operations failed. * * what to do about the failures is handled elsewhere. * what we need to do here is just: complete the master_bio. * * local completion error, if any, has been stored as ERR_PTR * in private_bio within drbd_request_endio. */ ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); error = PTR_ERR(req->private_bio); /* remove the request from the conflict detection * respective block_id verification hash */ if (!drbd_interval_empty(&req->i)) { struct rb_root *root; if (rw == WRITE) root = &mdev->write_requests; else root = &mdev->read_requests; drbd_remove_request_interval(root, req); } /* Before we can signal completion to the upper layers, * we may need to close the current transfer log epoch. * We are within the request lock, so we can simply compare * the request epoch number with the current transfer log * epoch number. If they match, increase the current_tle_nr, * and reset the transfer log epoch write_cnt. */ if (rw == WRITE && req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) start_new_tl_epoch(mdev->tconn); /* Update disk stats */ _drbd_end_io_acct(mdev, req); /* If READ failed, * have it be pushed back to the retry work queue, * so it will re-enter __drbd_make_request(), * and be re-assigned to a suitable local or remote path, * or failed if we do not have access to good data anymore. * * Unless it was failed early by __drbd_make_request(), * because no path was available, in which case * it was not even added to the transfer_log. * * READA may fail, and will not be retried. * * WRITE should have used all available paths already. */ if (!ok && rw == READ && !list_empty(&req->tl_requests)) req->rq_state |= RQ_POSTPONED; if (!(req->rq_state & RQ_POSTPONED)) { m->error = ok ? 0 : (error ?: -EIO); m->bio = req->master_bio; req->master_bio = NULL; }
/* static int blkdev_make_request(struct request_queue *q,struct bio *bio) { struct bio_vec *bvec; int i; unsigned long long dsk_offset; if((bio->bi_sector<<PAGE_SHIFT)+bio->bi_size >BLKDEV_BYTES) { printk(KERN_ERR "bad request!bi_sector:%llu,bi_size:%u\n",(unsigned long)bio->bi_sector,bio->bi_size); bio_endio(bio,-EIO); } dsk_offset=bio->bi_sector << PAGE_SHIFT; bio_for_each_segment(bvec,bio,i) { unsigned int count_done,count_current; void *iovec_mem; void *dsk_mem; iovec_mem=kmap(bvec->bv_page) + bvec->bv_offset; count_done = 0; while(count_done < bvec->bv_len) { count_current = min(bvec->bv_len - count_done,(unsigned int)(PAGE_SIZE-(dsk_offset + count_done)%PAGE_SIZE)); dsk_mem = radix_tree_lookup(&blkdev_data,(dsk_offset + count_done)%PAGE_SIZE); dsk_mem+=(dsk_offset + count_done)%PAGE_SIZE; switch(bio_rw(bio)) { case READ: case READA: memcpy(iovec_mem +count_done,dsk_mem,count_current); break; case WRITE: memcpy(dsk_mem,iovec_mem+count_done,count_current); } count_done +=count_current; } kunmap(bvec->bv_page); dsk_offset += bvec->bv_len; } bio_endio(bio,0); return 0; } */ static int blkdev_make_request(struct request_queue *q, struct bio *bio) { struct bio_vec *bvec; int i; unsigned long long dsk_offset; if ((bio->bi_sector << 9) + bio->bi_size > BLKDEV_BYTES) { printk(KERN_ERR BLKDEV_DISKNAME ": bad request: block=%llu, count=%u\n", (unsigned long long)bio->bi_sector, bio->bi_size); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return 0; } dsk_offset = bio->bi_sector << 9; bio_for_each_segment(bvec, bio, i) { unsigned int count_done, count_current; void *iovec_mem; void *dsk_mem; iovec_mem = kmap(bvec->bv_page) + bvec->bv_offset; count_done = 0; while (count_done < bvec->bv_len) { count_current = min(bvec->bv_len - count_done, (unsigned int)(PAGE_SIZE - ((dsk_offset + count_done) & ~PAGE_MASK))); dsk_mem = radix_tree_lookup(&blkdev_data, (dsk_offset + count_done) >> PAGE_SHIFT); if (!dsk_mem) { printk(KERN_ERR BLKDEV_DISKNAME ": search memory failed: %llu\n", (dsk_offset + count_done) >> PAGE_SHIFT); kunmap(bvec->bv_page); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return 0; } dsk_mem += (dsk_offset + count_done) & ~PAGE_MASK; switch (bio_rw(bio)) { case READ: case READA: memcpy(iovec_mem + count_done, dsk_mem, count_current); break; case WRITE: memcpy(dsk_mem, iovec_mem + count_done, count_current); break; default: printk(KERN_ERR BLKDEV_DISKNAME ": unknown value of bio_rw: %lu\n", bio_rw(bio)); kunmap(bvec->bv_page); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) bio_endio(bio, 0, -EIO); #else bio_endio(bio, -EIO); #endif return 0; } count_done += count_current; }