static int start_monitoring(struct devfreq *df)
{
	int ret, mbyte;

	ret = request_threaded_irq(l2pm_irq, NULL, mon_intr_handler,
			  IRQF_ONESHOT | IRQF_SHARED,
			  "cpubw_hwmon", df);
	if (ret) {
		pr_err("Unable to register interrupt handler\n");
		return ret;
	}

	mon_init();
	mon_disable(RD_MON);
	mon_disable(WR_MON);

	mbyte = (df->previous_freq * io_percent) / (2 * 100);
	prev_r_start_val = mon_set_limit_mbyte(RD_MON, mbyte);
	prev_w_start_val = mon_set_limit_mbyte(WR_MON, mbyte);
	prev_ts = ktime_get();
	prev_ab = 0;

	mon_irq_enable(RD_MON, true);
	mon_irq_enable(WR_MON, true);
	mon_enable(RD_MON);
	mon_enable(WR_MON);
	global_mon_enable(true);

	return 0;
}
unsigned long measure_bw_and_set_irq(void)
{
	long r_mbps, w_mbps, mbps;
	ktime_t ts;
	unsigned int us;

	preempt_disable();

	ts = ktime_get();
	us = ktime_to_us(ktime_sub(ts, prev_ts));
	if (!us)
		us = 1;

	mon_disable(RD_MON);
	mon_disable(WR_MON);

	r_mbps = mon_get_count(RD_MON, prev_r_start_val);
	r_mbps = beats_to_mbps(r_mbps, us);
	w_mbps = mon_get_count(WR_MON, prev_w_start_val);
	w_mbps = beats_to_mbps(w_mbps, us);

	prev_r_start_val = mon_set_limit_mbyte(RD_MON, to_limit(r_mbps));
	prev_w_start_val = mon_set_limit_mbyte(WR_MON, to_limit(w_mbps));
	prev_ts = ts;

	mon_enable(RD_MON);
	mon_enable(WR_MON);

	preempt_enable();

	mbps = r_mbps + w_mbps;
	pr_debug("R/W/BW/us = %ld/%ld/%ld/%d\n", r_mbps, w_mbps, mbps, us);

	return mbps;
}
/* Returns MBps of read/writes for the sampling window. */
static int mon_get_mbps(int n, u32 start_val, unsigned int us)
{
	u32 overflow, count;
	long long beats;

	count = get_l2_indirect_reg(L2PMnEVCNTR(n));
	overflow = get_l2_indirect_reg(L2PMOVSR);

	if (overflow & BIT(n))
		beats = 0xFFFFFFFF - start_val + count;
	else
		beats = count - start_val;

	beats *= USEC_PER_SEC;
	beats *= bytes_per_beat;
	do_div(beats, us);
	beats = DIV_ROUND_UP_ULL(beats, MBYTE);

	pr_debug("EV%d ov: %x, cnt: %x\n", n, overflow, count);

	return beats;
}

static void do_bw_sample(struct work_struct *work);
static DECLARE_DEFERRED_WORK(bw_sample, do_bw_sample);
static struct workqueue_struct *bw_sample_wq;

static DEFINE_MUTEX(bw_lock);
static ktime_t prev_ts;
static u32 prev_r_start_val;
static u32 prev_w_start_val;

static struct msm_bus_paths bw_levels[] = {
	BW(0), BW(200),
};
static struct msm_bus_scale_pdata bw_data = {
	.usecase = bw_levels,
	.num_usecases = ARRAY_SIZE(bw_levels),
	.name = "cpubw-krait",
	.active_only = 1,
};
static u32 bus_client;
static void compute_bw(int mbps);
static irqreturn_t mon_intr_handler(int irq, void *dev_id);

#define START_LIMIT	100 /* MBps */
static int start_monitoring(void)
{
	int mb_limit;
	int ret;

	bw_sample_wq = alloc_workqueue("cpubw-krait", WQ_HIGHPRI, 0);
	if (!bw_sample_wq) {
		pr_err("Unable to alloc workqueue\n");
		return -ENOMEM;
	}

	ret = request_threaded_irq(MON_INT, NULL, mon_intr_handler,
			  IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_RISING,
			  "cpubw_krait", mon_intr_handler);
	if (ret) {
		pr_err("Unable to register interrupt handler\n");
		return ret;
	}

	bus_client = msm_bus_scale_register_client(&bw_data);
	if (!bus_client) {
		pr_err("Unable to register bus client\n");
		ret = -ENODEV;
		goto bus_reg_fail;
	}

	compute_bw(START_LIMIT);

	mon_init();
	mon_disable(0);
	mon_disable(1);

	mb_limit = mult_frac(START_LIMIT, sample_ms, MSEC_PER_SEC);
	mb_limit /= 2;

	prev_r_start_val = mon_set_limit_mbyte(0, mb_limit);
	prev_w_start_val = mon_set_limit_mbyte(1, mb_limit);

	prev_ts = ktime_get();

	set_l2_indirect_reg(L2PMINTENSET, BIT(0));
	set_l2_indirect_reg(L2PMINTENSET, BIT(1));
	mon_enable(0);
	mon_enable(1);
	global_mon_enable(true);

	queue_delayed_work(bw_sample_wq, &bw_sample,
				msecs_to_jiffies(sample_ms));

	return 0;

bus_reg_fail:
	destroy_workqueue(bw_sample_wq);
	disable_irq(MON_INT);
	free_irq(MON_INT, mon_intr_handler);
	return ret;
}
static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps)
{
	struct bwmon *m = to_bwmon(hw);
	u32 limit;
	int ret;

	ret = request_threaded_irq(m->irq, NULL, bwmon_intr_handler,
				  IRQF_ONESHOT | IRQF_SHARED,
				  dev_name(m->dev), m);
	if (ret) {
		dev_err(m->dev, "Unable to register interrupt handler! (%d)\n",
				ret);
		return ret;
	}

	mon_disable(m);

	limit = mbps_to_bytes(mbps, hw->df->profile->polling_ms, 0);
	mon_set_limit(m, limit);

	mon_clear(m);
	mon_irq_clear(m);
	mon_irq_enable(m);
	mon_enable(m);

	return 0;
}
static unsigned long meas_bw_and_set_irq(struct bw_hwmon *hw,
					 unsigned int tol, unsigned int us)
{
	unsigned long mbps;
	u32 limit;
	unsigned int sample_ms = hw->df->profile->polling_ms;
	struct bwmon *m = to_bwmon(hw);

	mon_disable(m);

	mbps = mon_get_count(m);
	mbps = bytes_to_mbps(mbps, us);

	/*
	 * If the counter wraps on thres, don't set the thres too low.
	 * Setting it too low runs the risk of the counter wrapping around
	 * multiple times before the IRQ is processed.
	 */
	if (likely(!m->spec->wrap_on_thres))
		limit = mbps_to_bytes(mbps, sample_ms, tol);
	else
		limit = mbps_to_bytes(max(mbps, 400UL), sample_ms, tol);

	mon_set_limit(m, limit);

	mon_clear(m);
	mon_irq_clear(m);
	mon_enable(m);

	dev_dbg(m->dev, "MBps = %lu\n", mbps);
	return mbps;
}
static void measure_bw(void)
{
	int r_mbps, w_mbps, mbps;
	ktime_t ts;
	unsigned int us;

	mutex_lock(&bw_lock);

	/*
	 * Since we are stopping the counters, we don't want this short work
	 * to be interrupted by other tasks and cause the measurements to be
	 * wrong. Not blocking interrupts to avoid affecting interrupt
	 * latency and since they should be short anyway because they run in
	 * atomic context.
	 */
	preempt_disable();

	ts = ktime_get();
	us = ktime_to_us(ktime_sub(ts, prev_ts));
	if (!us)
		us = 1;

	mon_disable(0);
	mon_disable(1);

	r_mbps = mon_get_mbps(0, prev_r_start_val, us);
	w_mbps = mon_get_mbps(1, prev_w_start_val, us);

	prev_r_start_val = mon_set_limit_mbyte(0, to_limit(r_mbps));
	prev_w_start_val = mon_set_limit_mbyte(1, to_limit(w_mbps));

	mon_enable(0);
	mon_enable(1);

	preempt_enable();

	mbps = r_mbps + w_mbps;
	pr_debug("R/W/BW/us = %d/%d/%d/%d\n", r_mbps, w_mbps, mbps, us);
	compute_bw(mbps);

	prev_ts = ts;
	mutex_unlock(&bw_lock);
}
unsigned long measure_bw_and_set_irq(void)
{
	long r_mbps, w_mbps, mbps;
	ktime_t ts;
	unsigned int us;

	/*
	 * Since we are stopping the counters, we don't want this short work
	 * to be interrupted by other tasks and cause the measurements to be
	 * wrong. Not blocking interrupts to avoid affecting interrupt
	 * latency and since they should be short anyway because they run in
	 * atomic context.
	 */
	preempt_disable();

	ts = ktime_get();
	us = ktime_to_us(ktime_sub(ts, prev_ts));
	if (!us)
		us = 1;

	mon_disable(RD_MON);
	mon_disable(WR_MON);

	r_mbps = mon_get_count(RD_MON, prev_r_start_val);
	r_mbps = beats_to_mbps(r_mbps, us);
	w_mbps = mon_get_count(WR_MON, prev_w_start_val);
	w_mbps = beats_to_mbps(w_mbps, us);

	prev_r_start_val = mon_set_limit_mbyte(RD_MON, to_limit(r_mbps));
	prev_w_start_val = mon_set_limit_mbyte(WR_MON, to_limit(w_mbps));
	prev_ts = ts;

	mon_enable(RD_MON);
	mon_enable(WR_MON);

	preempt_enable();

	mbps = r_mbps + w_mbps;
	pr_debug("R/W/BW/us = %ld/%ld/%ld/%d\n", r_mbps, w_mbps, mbps, us);

	return mbps;
}
static int resume_bw_hwmon(struct bw_hwmon *hw)
{
	struct bwmon *m = to_bwmon(hw);

	mon_clear(m);
	mon_irq_enable(m);
	mon_enable(m);
	enable_irq(m->irq);

	return 0;
}
static int resume_bw_hwmon(struct bw_hwmon *hw)
{
	struct bwmon *m = to_bwmon(hw);
	int ret;

	mon_clear(m);
	mon_irq_enable(m);
	mon_enable(m);
	ret = request_threaded_irq(m->irq, NULL, bwmon_intr_handler,
				  IRQF_ONESHOT | IRQF_SHARED,
				  dev_name(m->dev), m);
	if (ret) {
		dev_err(m->dev, "Unable to register interrupt handler! (%d)\n",
				ret);
		return ret;
	}

	return 0;
}
static unsigned long meas_bw_and_set_irq(struct bw_hwmon *hw,
					 unsigned int tol, unsigned int us)
{
	unsigned long mbps;
	u32 limit;
	unsigned int sample_ms = hw->df->profile->polling_ms;
	struct bwmon *m = to_bwmon(hw);

	mon_disable(m);

	mbps = mon_get_count(m);
	mbps = bytes_to_mbps(mbps, us);
	/* + 1024 is to workaround HW design issue. Needs further tuning. */
	limit = mbps_to_bytes(mbps + 1024, sample_ms, tol);
	mon_set_limit(m, limit);

	mon_clear(m);
	mon_irq_clear(m);
	mon_enable(m);

	dev_dbg(m->dev, "MBps = %lu\n", mbps);
	return mbps;
}