Esempio n. 1
0
static int
_clone_init(struct iostash_bio *io, struct bio *clone, int is4ssd,
	    void *endiofn)
{
	int ret = 0;

	clone->bi_private = io;
	clone->bi_end_io = endiofn;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
	clone->bi_destructor = _clone_destructor;
#endif

	if (is4ssd) {
		if (io->ssd->online) {
			clone->bi_bdev = io->ssd->bdev;
			BIO_SECTOR(clone) =
			    (sector_t) (io->fragnum * SCE_SCTRPERFRAG) + 
			    (io->psn % SCE_SCTRPERFRAG) + IOSTASH_HEADERSCT;
			BIO_SIZE(clone) = BIO_SIZE(io->base_bio);
		} else {
			ret = -1;
		}
	}

	return ret;
}
Esempio n. 2
0
/*
 * bfq_bio_done(): .bio_done callback of the bfq policy
 *
 * Called after a bio is done, (by request_polling_biodone of dsched).
 * This function judges whet her a thread consumes up its time slice, and
 * if so, it will set the maybe_timeout flag in bfq_tdio structure. Any
 * further action of that thread or the bfq scheduler will cause the
 * thread to be expired. (in bfq_queue() or in bfq_dequeue())
 *
 * This function requires the bfq_tdio pointer of the thread that pushes
 * bp to be stored by dsched_set_bio_priv() earlier. Currently it is
 * stored when bfq_queue() is called.
 *
 * lock: none. This function CANNOT be blocked by any lock
 *
 * refcount:
 *	the corresponding tdio's refcount should decrease by 1 after
 *	this function call. The counterpart increasing is in bfq_queue().
 *	For each bio pushed down, we increase the refcount of the pushing
 *	tdio.
 */
static void
bfq_bio_done(struct bio *bp)
{
	struct disk *dp = dsched_get_bio_dp(bp);
	struct bfq_thread_io *bfq_tdio = dsched_get_bio_priv(bp);
	struct bfq_disk_ctx *bfq_diskctx = dsched_get_disk_priv(dp);
	struct timeval tv;
	int ticks_expired;

	KKASSERT(bfq_tdio);

	dsched_thread_io_ref(&bfq_tdio->head);

	atomic_add_int(&bfq_tdio->bio_completed, 1);

	/* the tdio has already expired */
	if (bfq_tdio != bfq_diskctx->bfq_active_tdio)
		goto rtn;
	atomic_add_int(&bfq_tdio->service_received, BIO_SIZE(bp));

	/* current time */
	getmicrotime(&tv);
	bfq_tdio->last_request_done_time = tv;
	timevalsub (&tv, &bfq_tdio->service_start_time);
	ticks_expired = tvtohz_high(&tv);

	/* the thread has run out its time slice */
	if ((ticks_expired != 0x7fffffff) &&
	    (ticks_expired >= BFQ_SLICE_TIMEOUT)) {
		/*
		 * we cannot block here, so just set a flag
		 */
#if 0
		bfq_tdio->maybe_timeout = 1;
#endif
		if (atomic_cmpset_int(&bfq_tdio->maybe_timeout, 0, 1)) {
			bfq_update_avg_time_slice(bfq_diskctx, tv);
			dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p may time out\n", bfq_tdio);
		}
	}
rtn:
	dsched_thread_io_unref(&bfq_tdio->head); /* ref'ed in this function */
	dsched_thread_io_unref(&bfq_tdio->head); /* ref'ed in queue() */

}
Esempio n. 3
0
static struct iostash_bio *_io_alloc(struct hdd_info * hdd, struct ssd_info * ssd, uint32_t fragnum,
				    struct bio *bio, sector_t psn)
{
	struct iostash_bio *io = mempool_alloc(hdd->io_pool, GFP_NOIO);

	if (io) {
		atomic_inc(&hdd->io_pending);

		io->hdd = hdd;
		io->ssd = ssd;
		io->fragnum = fragnum;
		io->base_bio = bio;
		io->psn = psn;
		io->nr_sctr = to_sector(BIO_SIZE(bio));
		io->error = 0;
		io->ssd_werr = 0;	/* SSD write error */
		atomic_set(&io->io_pending, 0);
	}
	return io;
}
Esempio n. 4
0
void iostash_mkrequest(struct request_queue *q, struct bio *bio)
#endif
{
	struct hdd_info *hdd;
	struct ssd_info *ssd;
	struct iostash_bio *io;
        sce_fmap_t fmap;
	uint32_t nr_sctr;
	sector_t psn;
	make_request_fn *org_mapreq = NULL;
#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
	blk_qc_t ret = BLK_QC_T_NONE;
#endif

	DBG("Got bio=%p bio->bi_rw(%lu) request at s=%lu l=%u.\n",
		bio, bio->bi_rw, BIO_SECTOR(bio), bio_sectors(bio));

	rcu_read_lock();
	hdd = hdd_search(bio);
	if (hdd) {
		atomic_inc(&hdd->nr_ref);
		org_mapreq = hdd->org_mapreq;
	}
	rcu_read_unlock();

	if (unlikely(NULL == hdd)) {
		/* have to requeue the request, somebody was holding a
		 * dangling reference */
		ERR("Request holding a dangling make_request_fn pointer\n.");

#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
		bio->bi_error = -EAGAIN;
		return ret;
#elif LINUX_VERSION_CODE <= KERNEL_VERSION(3,1,0)
		rmb();		/* read the change in make_request_fn */
		return -EAGAIN; /* retry */
#else
		/* no retry possible in newer kernels since the return
		 * of make_request_fn is no longer checked and retried
		 * if not zero, we cannot unload the module */
		BUG();
		return;
#endif
	}

	if (!hdd->online) {
		ERR("request re-routed due to hdd not being online.\n");
		/* being unloaded, re-route */
		goto out;
	}

	hdd->request_q = q;
	/* calculate physical sector number -- offset partition information */
	psn = BIO_SECTOR(bio) + bio->bi_bdev->bd_part->start_sect;
	nr_sctr = to_sector(BIO_SIZE(bio));
	do {
		if (bio_sectors(bio) == 0)
			break;

		/* partition boundary check */
		if ((psn < hdd->part_start) ||
			((psn + nr_sctr) > hdd->part_end))
			break;

		if (bio_data_dir(bio) == WRITE) {
			gctx.st_write++;

#ifdef SCE_AWT
			/* make sure the request is only for one fragment */
			if (((psn + nr_sctr - 1) / SCE_SCTRPERFRAG) !=
				(psn / SCE_SCTRPERFRAG)) {
				sce_invalidate(hdd->lun, psn, nr_sctr);
				break;
			}
			rcu_read_lock();
			if (sce_get4write(hdd->lun, psn, nr_sctr, &fmap) 
				== SCE_SUCCESS) {
				ssd = (struct ssd_info *)fmap.cdevctx;
				atomic_inc(&ssd->nr_ref);
				rcu_read_unlock();
				if (!ssd->online) {
					sce_put4write(hdd->lun, psn,
						nr_sctr, 1);
					atomic_dec(&ssd->nr_ref);
				} else {
					io = _io_alloc(hdd, ssd, fmap.fragnum, bio, psn);
					if (NULL == io) {
						atomic_dec(&ssd->nr_ref);
						break;
					}
#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
					ret = _io_worker_run(&io->work);
#else
					_io_queue(io);
#endif
					/* lose the reference to hdd, not needed anymore */
					atomic_dec(&hdd->nr_ref);
#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
					return ret;
#elif LINUX_VERSION_CODE <= KERNEL_VERSION(3,1,0)
					return 0;
#else
					return;
#endif
				}
			} else
				rcu_read_unlock();
#else
			sce_invalidate(hdd->lun, psn, nr_sctr);
#endif
			break;
		}
		else
		{
			/* Read handling */
			gctx.st_read++;

			/* make sure the request is only for one fragment */
			if (((psn + nr_sctr - 1) / SCE_SCTRPERFRAG) !=
				(psn / SCE_SCTRPERFRAG))
				break;

			/* cache hit/miss check */
			rcu_read_lock();
			if (sce_get4read(hdd->lun, psn, nr_sctr, &fmap) != SCE_SUCCESS) {
				rcu_read_unlock();
				break;
			}
			BUG_ON(NULL == fmap.cdevctx);
			ssd = (struct ssd_info *) fmap.cdevctx;
			atomic_inc(&ssd->nr_ref);
			rcu_read_unlock();
			/* make sure the request is within the SSD limits and the SSD is online */
			if (!ssd->online || ssd->queue_max_hw_sectors < nr_sctr) {
				sce_put4read(hdd->lun, psn, nr_sctr);
				atomic_dec(&ssd->nr_ref);
				break;
			}

			/* cache hit */
			io = _io_alloc(hdd, ssd, fmap.fragnum, bio, psn);
			if (NULL == io) {
				atomic_dec(&ssd->nr_ref);
				break;
			}

#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
			ret = _io_worker_run(&io->work);
#else
			_io_queue(io);
#endif
			/* lose the reference to hdd , not needed anymore */
			atomic_dec(&hdd->nr_ref);
		}

#if KERNEL_VERSION(4,4,0) <= LINUX_VERSION_CODE
		return ret;
#elif LINUX_VERSION_CODE <= KERNEL_VERSION(3,1,0)
		return 0;
#else
		return;
#endif
	} while (0);

out:
	/* lose the reference to hdd , not needed anymore */
	atomic_dec(&hdd->nr_ref);

	return (org_mapreq) (q, bio);
}
Esempio n. 5
0
/*
 * bfq_dequeue(): dispatch bios to the disk driver.
 *
 * This function will push as many bios as the number of free slots
 * in the tag queue.
 *
 * In the progress of dispatching, the following events may happen:
 *  - Current thread is timeout: Expire the current thread for
 *    BFQ_REASON_TIMEOUT, and select a new thread to serve in the
 *    wf2q tree.
 *
 *  - Current thread runs out of its budget: Expire the current thread
 *    for BFQ_REASON_OUT_OF_BUDGET, and select a new thread to serve
 *
 *  - Current thread has no further bios in its queue: if the AS feature
 *    is turned on, the bfq scheduler sets an alarm and starts to suspend.
 *    The bfq_timeout() or bfq_queue() calls may resume the scheduler.
 *
 * Implementation note: The bios selected to be dispatched will first
 * be stored in an array bio_do_dispatch. After this function releases
 * all the locks it holds, it will call dsched_strategy_request_polling()
 * for each bio stored.
 *
 * With the help of bfq_disk_ctx->pending_dequeue,
 * there will be only one bfq_dequeue pending on the BFQ_LOCK.
 *
 * lock:
 *	BFQ_LOCK: protect from wf2q_augtree operations in bfq_queue()
 *	THREAD_IO_LOCK: locks the active_tdio. Protect from queue insertions
 *	in bfq_queue; Protect the active_tdio->budget
 *
 * refcount:
 *  If the scheduler decides to suspend, the refcount of active_tdio
 *  increases by 1. The counterpart decreasing is in bfq_queue() and
 *  bfq_timeout()
 * blocking:
 *  May be blocking on the disk driver lock. It depends on drivers.
 *
 * Calling path:
 * The callers could be:
 *	bfq_queue(), bfq_timeout() and the registered polling function.
 *
 *	caller --> helper_msg_dequeue --lwkt_msg--> helper_thread-> me
 *
 */
void
bfq_dequeue(struct dsched_disk_ctx *diskctx)
{
	int free_slots,
	    bio_index = 0, i,
	    remaining_budget = 0;/* remaining budget of current active process */

	struct bio *bio, *bio_to_dispatch[33];
	struct bfq_thread_io *active_tdio = NULL;
	struct bfq_disk_ctx *bfq_diskctx = (struct bfq_disk_ctx *)diskctx;

	BFQ_LOCK(bfq_diskctx);
	atomic_cmpset_int(&bfq_diskctx->pending_dequeue, 1, 0);

	/*
	 * The whole scheduler is waiting for further bios
	 * from process currently being served
	 */
	if (bfq_diskctx->bfq_blockon != NULL)
		goto rtn;

	remaining_budget = bfq_diskctx->bfq_remaining_budget;
	active_tdio = bfq_diskctx->bfq_active_tdio;
	dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: dequeue: Im in. active_tdio = %p\n", active_tdio);

	free_slots = diskctx->max_tag_queue_depth - diskctx->current_tag_queue_depth;
	KKASSERT(free_slots >= 0 && free_slots <= 32);

	if (active_tdio)
		DSCHED_THREAD_IO_LOCK(&active_tdio->head);

	while (free_slots) {
		/* Here active_tdio must be locked ! */
		if (active_tdio) {
			/*
			 * the bio_done function has marked the current
			 * tdio timeout
			 */
			if (active_tdio->maybe_timeout) {
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p time out in dequeue()\n", active_tdio);
				wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget);
				bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_TIMEOUT);

				/* there still exist bios not dispatched,
				 * reinsert the tdio into aug-tree*/
				if (active_tdio->head.qlength > 0) {
					wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio);
					KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count);
				}

				active_tdio->maybe_timeout = 0;
				DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
				active_tdio = NULL;
				continue;
			}

			/* select next bio to dispatch */
			/* TODO: a wiser slection */
			KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
			bio = TAILQ_FIRST(&active_tdio->head.queue);
			dsched_debug(BFQ_DEBUG_NORMAL, "bfq: the first bio in queue of active_tdio %p is %p\n", active_tdio, bio);

			dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p exists, remaining budget = %d, tdio budget = %d\n, qlength = %d, first bio = %p, first bio cmd = %d, first bio size = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength, bio, bio?bio->bio_buf->b_cmd:-1, bio?bio->bio_buf->b_bcount:-1);

			/*
			 * The bio is not read or write, just
			 * push it down.
			 */
			if (bio && (bio->bio_buf->b_cmd != BUF_CMD_READ) &&
			    (bio->bio_buf->b_cmd != BUF_CMD_WRITE)) {
				dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio);
				KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
				TAILQ_REMOVE(&active_tdio->head.queue, bio, link);
				active_tdio->head.qlength--;
				free_slots--;

#if 0
				dsched_strategy_request_polling(diskctx->dp, bio, diskctx);
#endif
				bio_to_dispatch[bio_index++] = bio;
				KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth);
				continue;
			}
			/*
			 * Run out of budget
			 * But this is not because the size of bio is larger
			 * than the complete budget.
			 * If the size of bio is larger than the complete
			 * budget, then use a complete budget to cover it.
			 */
			if (bio && (remaining_budget < BIO_SIZE(bio)) &&
			    (remaining_budget != active_tdio->budget)) {
				/* charge budget used */
				wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget);
				bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET);
				wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio);
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: thread %p ran out of budget\n", active_tdio);
				DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
				active_tdio = NULL;
			} else { /* if (bio && remaining_budget < BIO_SIZE(bio) && remaining_budget != active_tdio->budget) */

				/*
				 * Having enough budget,
				 * or having a complete budget and the size of bio
				 * is larger than that.
				 */
				if (bio) {
					/* dispatch */
					remaining_budget -= BIO_SIZE(bio);
					/*
					 * The size of the first bio is larger
					 * than the whole budget, we should
					 * charge the extra part
					 */
					if (remaining_budget < 0)
						wf2q_update_vd(active_tdio, -remaining_budget);
					/* compensate */
					wf2q_update_vd(active_tdio, -remaining_budget);
					/*
					 * remaining_budget may be < 0,
					 * but to prevent the budget of current tdio
					 * to substract a negative number,
					 * the remaining_budget has to be >= 0
					 */
					remaining_budget = MAX(0, remaining_budget);
					dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio);
					KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
					TAILQ_REMOVE(&active_tdio->head.queue, bio, link);
					free_slots--;
					active_tdio->head.qlength--;
					active_tdio->bio_dispatched++;
					wf2q_inc_tot_service(&bfq_diskctx->bfq_wf2q, BIO_SIZE(bio));
					dsched_debug(BFQ_DEBUG_VERBOSE,
					    "BFQ: %p's bio dispatched, size=%d, remaining_budget = %d\n",
					    active_tdio, BIO_SIZE(bio), remaining_budget);
#if 0
					dsched_strategy_request_polling(diskctx->dp, bio, diskctx);
#endif
					bio_to_dispatch[bio_index++] = bio;
					KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth);

				} else { /* if (bio) */

					KKASSERT(active_tdio);
					/*
					 * If AS feature is switched off,
					 * expire the tdio as well
					 */
					if ((remaining_budget <= 0) ||
					    !(bfq_diskctx->bfq_flag & BFQ_FLAG_AS) ||
					    !active_tdio->tdio_as_switch) {
						active_tdio->budget -= remaining_budget;
						wf2q_update_vd(active_tdio, active_tdio->budget);
						bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET);
						DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
						active_tdio = NULL;
					} else {

						/* no further bio, wait for a while */
						bfq_diskctx->bfq_blockon = active_tdio;
						/*
						 * Increase ref count to ensure that
						 * tdio will not be destroyed during waiting.
						 */
						dsched_thread_io_ref(&active_tdio->head);
						/*
						 * If the tdio is seeky but not thingking for
						 * too long, we wait for it a little shorter
						 */
						if (active_tdio->seek_samples >= BFQ_VALID_MIN_SAMPLES && BFQ_TDIO_SEEKY(active_tdio))
							callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT_MIN, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx);
						else
							callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx);

						/* save the start time of blocking */
						getmicrotime(&active_tdio->as_start_time);

						dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: blocked on %p, remaining_budget = %d\n", active_tdio, remaining_budget);
						DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
						goto save_and_rtn;
					}
				}
			}
		} else { /* if (active_tdio) */
			/* there is no active tdio */

			/* no pending bios at all */
			active_tdio = wf2q_get_next_thread_io(&bfq_diskctx->bfq_wf2q);

			if (!active_tdio) {
				KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count == 0);
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: no more eligible tdio!\n");
				goto save_and_rtn;
			}

			/*
			 * A new tdio is picked,
			 * initialize the service related statistic data
			 */
			DSCHED_THREAD_IO_LOCK(&active_tdio->head);
			active_tdio->service_received = 0;

			/*
			 * Reset the maybe_timeout flag, which
			 * may be set by a biodone after the the service is done
			 */
			getmicrotime(&active_tdio->service_start_time);
			active_tdio->maybe_timeout = 0;

			remaining_budget = active_tdio->budget;
			dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p selected, remaining budget = %d, tdio budget = %d\n, qlength = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength);
		}

	}/* while (free_slots) */

	/* reach here only when free_slots == 0 */
	if (active_tdio) /* && lockcount(&active_tdio->head.lock) > 0) */
		DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);

save_and_rtn:
	/* save the remaining budget */
	bfq_diskctx->bfq_remaining_budget = remaining_budget;
	bfq_diskctx->bfq_active_tdio = active_tdio;
rtn:
	BFQ_UNLOCK(bfq_diskctx);
	/*dispatch the planned bios*/
	for (i = 0; i < bio_index; i++)
		dsched_strategy_request_polling(diskctx->dp, bio_to_dispatch[i], diskctx);

}