void kbase_pm_request_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;
	u64 cores;

	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	cores = shader_cores;
	while (cores) {
		int bitnum = fls64(cores) - 1;
		u64 bit = 1ULL << bitnum;

		/* It should be almost impossible for this to overflow. It would require 2^32 atoms
		 * to request a particular core, which would require 2^24 contexts to submit. This 
		 * would require an amount of memory that is impossible on a 32-bit system and 
		 * extremely unlikely on a 64-bit system. */
		int cnt = ++kbdev->shader_needed_cnt[bitnum];

		if (1 == cnt) {
			kbdev->shader_needed_bitmap |= bit;
			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		++kbdev->tiler_needed_cnt;

		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);

		/* For tiler jobs, we must make sure that core 0 is not turned off if it's already on.
	         * However, it's safe for core 0 to be left off and turned on later whilst a tiler job
		 * is running. Hence, we don't need to update the cores state immediately. Also,
		 * attempts to turn off cores will always check the tiler_needed/inuse state first anyway.
		 *
		 * Finally, kbase_js_choose_affinity() ensures core 0 is always requested for tiler jobs
		 * anyway. Hence when there's only a tiler job in the system, this will still cause
		 * kbase_pm_update_cores_state_nolock() to be called.
		 *
		 * Note that we still need to keep track of tiler_needed/inuse_cnt, to ensure that
		 * kbase_pm_update_cores_state_nolock() can override the core availability policy and
		 * force core 0 to be powered when a tiler job is in the system. */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);

		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_START, change_gpu_state);
		kbase_pm_update_cores_state_nolock(kbdev);
		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
{
	unsigned long flags;

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
	kbdev->pm.instr_enabled = MALI_FALSE;

	kbase_pm_update_cores_state_nolock(kbdev);
	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_update_cores_state(kbase_device *kbdev)
{
	unsigned long flags;

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	kbase_pm_update_cores_state_nolock(kbdev);

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_unrequest_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;

	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	while (shader_cores) {
		int bitnum = fls64(shader_cores) - 1;
		u64 bit = 1ULL << bitnum;
		int cnt;

		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);

		cnt = --kbdev->shader_needed_cnt[bitnum];

		if (0 == cnt) {
			kbdev->shader_needed_bitmap &= ~bit;

			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		shader_cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);

		--kbdev->tiler_needed_cnt;

		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
		 * when the last tiler job unrequests cores: kbase_js_choose_affinity() ensures core 0
		 * was originally requested for tiler jobs. Hence when there's only a tiler job in the
		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called. */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);

		kbase_pm_update_cores_state_nolock(kbdev);

		/* Trace that any state change effectively completes immediately -
		 * no-one will wait on the state change */
		kbase_pm_trace_check_and_finish_state_change(kbdev);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_release_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
{
	unsigned long flags;
	kbase_pm_change_state change_gpu_state = 0u;

	KBASE_DEBUG_ASSERT(kbdev != NULL);

	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	while (shader_cores) {
		int bitnum = fls64(shader_cores) - 1;
		u64 bit = 1ULL << bitnum;
		int cnt;

		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);

		cnt = --kbdev->shader_inuse_cnt[bitnum];

		if (0 == cnt) {
			kbdev->shader_inuse_bitmap &= ~bit;
			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
		}

		shader_cores &= ~bit;
	}

	if (tiler_required != MALI_FALSE) {
		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);

		--kbdev->tiler_inuse_cnt;

		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
		 * when the last tiler job finishes: kbase_js_choose_affinity() ensures core 0 was
		 * originally requested for tiler jobs. Hence when there's only a tiler job in the
		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called */
	}

	if (change_gpu_state) {
		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap);

		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_START, change_gpu_state);
		kbase_pm_update_cores_state_nolock(kbdev);
		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_END, change_gpu_state);

		/* Trace that any state change completed immediately */
		kbase_pm_trace_check_and_finish_state_change(kbdev);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
                            const struct kbase_pm_ca_policy *new_policy)
{
    const struct kbase_pm_ca_policy *old_policy;
    unsigned long flags;

    KBASE_DEBUG_ASSERT(kbdev != NULL);
    KBASE_DEBUG_ASSERT(new_policy != NULL);

    KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
                    new_policy->id);

    /* During a policy change we pretend the GPU is active */
    /* A suspend won't happen here, because we're in a syscall from a
     * userspace thread */
    kbase_pm_context_active(kbdev);

    mutex_lock(&kbdev->pm.lock);

    /* Remove the policy to prevent IRQ handlers from working on it */
    spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
    old_policy = kbdev->pm.backend.ca_current_policy;
    kbdev->pm.backend.ca_current_policy = NULL;
    spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);

    if (old_policy->term)
        old_policy->term(kbdev);

    if (new_policy->init)
        new_policy->init(kbdev);

    spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
    kbdev->pm.backend.ca_current_policy = new_policy;

    /* If any core power state changes were previously attempted, but
     * couldn't be made because the policy was changing (current_policy was
     * NULL), then re-try them here. */
    kbase_pm_update_cores_state_nolock(kbdev);

    kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
            kbdev->shader_ready_bitmap,
            kbdev->shader_transitioning_bitmap);

    spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);

    mutex_unlock(&kbdev->pm.lock);

    /* Now the policy change is finished, we release our fake context active
     * reference */
    kbase_pm_context_idle(kbdev);
}
void kbase_pm_release_l2_caches(kbase_device *kbdev)
{
	unsigned long flags;
	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);

	--kbdev->l2_users_count;

	if (!kbdev->l2_users_count) {
		kbase_pm_update_cores_state_nolock(kbdev);
		/* Trace that any state change completed immediately */
		kbase_pm_trace_check_and_finish_state_change(kbdev);
	}

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
}
void kbase_pm_request_l2_caches(kbase_device *kbdev)
{
	unsigned long flags;
	u32 prior_l2_users_count;
	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);

	prior_l2_users_count = kbdev->l2_users_count++;

	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);

	if (!prior_l2_users_count)
		kbase_pm_update_cores_state_nolock(kbdev);

	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
	wait_event(kbdev->pm.l2_powered_wait, kbdev->pm.l2_powered == 1);

	/* Trace that any state change completed immediately */
	kbase_pm_trace_check_and_finish_state_change(kbdev);
}