/**
 * Submit the 8401 workaround job.
 *
 * Important for BASE_HW_ISSUE_8987: This job always uses 16 RMUs
 * - Therefore, on slot[1] it will always use the same number of RMUs as another
 * GLES job.
 * - On slot[2], no other job (GLES or otherwise) will be running on the
 * cores, by virtue of it being slot[2]. Therefore, any value of RMUs is
 * acceptable.
 */
void kbasep_8401_submit_dummy_job(kbase_device *kbdev, int js)
{
	u32 cfg;
	mali_addr64 jc;

	/* While this workaround is active we reserve the last address space just for submitting the dummy jobs */
	int as = kbdev->nr_hw_address_spaces;

	/* Don't issue compute jobs on job slot 0 */
	OSK_ASSERT(js != 0);
	OSK_ASSERT(js < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT);

	/* Job chain GPU address */
	jc = (js+WORKAROUND_PAGE_OFFSET)*OSK_PAGE_SIZE; /* GPU phys address (see kbase_mmu_insert_pages call in kbasep_8401_workaround_init*/

	/* Clear the job status words which may contain values from a previous job completion */
	memset(kbdev->workaround_compute_job_va[js], 0,  4*sizeof(u32));

	/* Get the affinity of the previous job */
	dummy_job_atom[js].affinity = ((u64)kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_LO), NULL)) |
	                              (((u64)kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_HI), NULL)) << 32);

	/* Don't submit a compute job if the affinity was previously zero (i.e. no jobs have run yet on this slot) */
	if(!dummy_job_atom[js].affinity)
	{
		return;
	}

	/* Ensure that our page tables are programmed into the MMU */
	kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_LO),
	                       (kbdev->workaround_kctx->pgd & ASn_TRANSTAB_ADDR_SPACE_MASK) | ASn_TRANSTAB_READ_INNER
	                       | ASn_TRANSTAB_ADRMODE_TABLE, NULL);

	kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_HI), (kbdev->workaround_kctx->pgd >> 32), NULL);

	kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_LO), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL);
	kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_HI), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL);
	kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL);

	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), jc & 0xFFFFFFFF, NULL);
	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), jc >> 32, NULL);

	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), dummy_job_atom[js].affinity & 0xFFFFFFFF, NULL);
	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), dummy_job_atom[js].affinity >> 32, NULL);

	/* start MMU, medium priority, cache clean/flush on end, clean/flush on start */
	cfg = as | JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_START_MMU
	         | JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_THREAD_PRI(8);
	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_CONFIG_NEXT), cfg, NULL);

	KBASE_TRACE_ADD_SLOT( kbdev, JM_SUBMIT, NULL, 0, jc, js );

	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_START, NULL);
	/* Report that the job has been submitted */
	kbasep_jm_enqueue_submit_slot(&kbdev->jm_slots[js], &dummy_job_atom[js]);
}
void kbase_wait_write_flush(struct kbase_context *kctx)
{
	u32 base_count = 0;

	/* A suspend won't happen here, because we're in a syscall from a
	 * userspace thread */

	kbase_pm_context_active(kctx->kbdev);
	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);

	while (true) {
		u32 new_count;

		new_count = kbase_reg_read(kctx->kbdev,
					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
		/* First time around, just store the count. */
		if (base_count == 0) {
			base_count = new_count;
			continue;
		}

		/* No need to handle wrapping, unsigned maths works for this. */
		if ((new_count - base_count) > 1000)
			break;
	}

	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
	kbase_pm_context_idle(kctx->kbdev);
}
/**
 * @brief Issue Cache Clean & Invalidate command to hardware
 */
static void kbasep_instr_hwcnt_cacheclean(kbase_device *kbdev)
{
	unsigned long flags;
	unsigned long pm_flags;
	u32 irq_mask;

	KBASE_DEBUG_ASSERT(NULL != kbdev);

	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
	/* Wait for any reset to complete */
	while (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
		wait_event(kbdev->hwcnt.cache_clean_wait,
		           kbdev->hwcnt.state != KBASE_INSTR_STATE_RESETTING);
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
	}
	KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_REQUEST_CLEAN);

	/* Enable interrupt */
	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL);
	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);

	/* clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */
	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL);
	kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANING;

	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
/**
 * @brief Cache clean interrupt received
 */
void kbase_clean_caches_done(kbase_device *kbdev)
{
	u32 irq_mask;

	if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) {
		unsigned long flags;
		unsigned long pm_flags;
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
		/* Disable interrupt */
		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);

		/* Wakeup... */
		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANING) {
			/* Only wake if we weren't resetting */
			kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED;
			wake_up(&kbdev->hwcnt.cache_clean_wait);
		}
		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
		 * and the instrumentation state hasn't been restored yet -
		 * kbasep_reset_timeout_worker() will do the rest of the work */

		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
	}
}
示例#5
0
static void mmu_mask_reenable(kbase_device * kbdev, kbase_context *kctx, kbase_as * as)
{
	u32 mask;
	osk_spinlock_irq_lock(&kbdev->mmu_mask_change);
	mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx);
	mask |= ((1UL << as->number) | (1UL << (MMU_REGS_BUS_ERROR_FLAG(as->number))));
	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, kctx);
	osk_spinlock_irq_unlock(&kbdev->mmu_mask_change);
}
示例#6
0
static void mmu_mask_reenable(kbase_device * kbdev, kbase_context *kctx, kbase_as * as)
{
	unsigned long flags;
	u32 mask;
	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
	mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx);
	mask |= ((1UL << as->number) | (1UL << (MMU_REGS_BUS_ERROR_FLAG(as->number))));
	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, kctx);
	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
{
	int offset = 0;

	if (kctx->reg_dump == NULL)
		return false;

	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
		kctx->reg_dump[offset+1] =
				kbase_reg_read(kctx->kbdev,
						kctx->reg_dump[offset], NULL);
		offset += 2;
	}
	return true;
}
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
				u64 *system_time, struct timespec *ts)
{
	u32 hi1, hi2;

	kbase_pm_request_gpu_cycle_counter(kbdev);

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
	 * correctly */
	do {
		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
									NULL);
		*cycle_counter = kbase_reg_read(kbdev,
					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
									NULL);
		*cycle_counter |= (((u64) hi1) << 32);
	} while (hi1 != hi2);

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
	 * correctly */
	do {
		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
									NULL);
		*system_time = kbase_reg_read(kbdev,
					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
									NULL);
		*system_time |= (((u64) hi1) << 32);
	} while (hi1 != hi2);

	/* Record the CPU's idea of current time */
	getrawmonotonic(ts);

	kbase_pm_release_gpu_cycle_counter(kbdev);
}
static base_jd_event_code kbase_dump_cpu_gpu_time(kbase_jd_atom *katom)
{
	kbase_va_region *reg;
	osk_phy_addr addr;
	u64 pfn;
	u32 offset;
	char *page;
	struct timespec ts;
	base_dump_cpu_gpu_counters data;
	u64 system_time;
	u64 cycle_counter;
	mali_addr64 jc = katom->jc;
	kbase_context *kctx = katom->kctx;

	u32 hi1, hi2;

	memset(&data, 0, sizeof(data));

	kbase_pm_context_active(kctx->kbdev);

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
	do {
		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
		cycle_counter = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
		cycle_counter |= (((u64)hi1) << 32);
	} while (hi1 != hi2);

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
	do {
		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
		system_time = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
		system_time |= (((u64)hi1) << 32);
	} while (hi1 != hi2);

	/* Record the CPU's idea of current time */
	getnstimeofday(&ts);

	kbase_pm_context_idle(kctx->kbdev);

	data.sec = ts.tv_sec;
	data.usec = ts.tv_nsec / 1000;
	data.system_time = system_time;
	data.cycle_counter = cycle_counter;

	pfn = jc >> 12;
	offset = jc & 0xFFF;

	if (offset > 0x1000-sizeof(data))
	{
		/* Wouldn't fit in the page */
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
	if (!reg)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}
	
	if (! (reg->flags & KBASE_REG_GPU_WR) )
	{
		/* Region is not writable by GPU so we won't write to it either */
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	if (!reg->phy_pages)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	addr = reg->phy_pages[pfn - reg->start_pfn];
	if (!addr)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	page = osk_kmap(addr);
	if (!page)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}
	memcpy(page+offset, &data, sizeof(data));
	osk_sync_to_cpu(addr+offset, page+offset, sizeof(data));
	osk_kunmap(addr, page);

	return BASE_JD_EVENT_DONE;
}
STATIC mali_error kbase_instr_hwcnt_enable_internal(kbase_device *kbdev, kbase_context *kctx, kbase_uk_hwcnt_setup *setup)
{
	unsigned long flags, pm_flags;
	mali_error err = MALI_ERROR_FUNCTION_FAILED;
	kbasep_js_device_data *js_devdata;
	u32 irq_mask;
	int ret;
	u64 shader_cores_needed;

	KBASE_DEBUG_ASSERT(NULL != kctx);
	KBASE_DEBUG_ASSERT(NULL != kbdev);
	KBASE_DEBUG_ASSERT(NULL != setup);
	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);

	shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);

	js_devdata = &kbdev->js_data;

	/* alignment failure */
	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
		goto out_err;

	/* Override core availability policy to ensure all cores are available */
	kbase_pm_ca_instr_enable(kbdev);

	/* Mark the context as active so the GPU is kept turned on */
	/* A suspend won't happen here, because we're in a syscall from a userspace
	 * thread. */
	kbase_pm_context_active(kbdev);

	/* Request the cores early on synchronously - we'll release them on any errors
	 * (e.g. instrumentation already active) */
	kbase_pm_request_cores_sync(kbdev, MALI_TRUE, shader_cores_needed);

	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
		/* GPU is being reset */
		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
	}

	if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) {
		/* Instrumentation is already enabled */
		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
		goto out_unrequest_cores;
	}

	/* Enable interrupt */
	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL);
	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);

	/* In use, this context is the owner */
	kbdev->hwcnt.kctx = kctx;
	/* Remember the dump address so we can reprogram it later */
	kbdev->hwcnt.addr = setup->dump_buffer;
	/* Remember all the settings for suspend/resume */
	if (&kbdev->hwcnt.suspended_state != setup)
		memcpy(&kbdev->hwcnt.suspended_state, setup, sizeof(kbdev->hwcnt.suspended_state));

	/* Request the clean */
	kbdev->hwcnt.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
	kbdev->hwcnt.triggered = 0;
	/* Clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */
	ret = queue_work(kbdev->hwcnt.cache_clean_wq, &kbdev->hwcnt.cache_clean_work);
	KBASE_DEBUG_ASSERT(ret);

	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

	/* Wait for cacheclean to complete */
	wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);

	KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE);

	/* Schedule the context in */
	kbasep_js_schedule_privileged_ctx(kbdev, kctx);

	/* Configure */
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),     setup->dump_buffer & 0xFFFFFFFF, kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),     setup->dump_buffer >> 32,        kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),       setup->jm_bm,                    kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),   setup->shader_bm,                kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), setup->l3_cache_bm,              kctx);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),   setup->mmu_l2_bm,                kctx);
	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */
	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx);
	else
		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx);

	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx);

	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */
	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx);
	
	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
		/* GPU is being reset */
		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
	}

	kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
	kbdev->hwcnt.triggered = 1;
	wake_up(&kbdev->hwcnt.wait);

	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

	err = MALI_ERROR_NONE;

	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
	return err;
 out_unrequest_cores:
	kbase_pm_unrequest_cores(kbdev, MALI_TRUE, shader_cores_needed);
	kbase_pm_context_idle(kbdev);
 out_err:
	return err;
}
/**
 * @brief Disable HW counters collection
 *
 * Note: might sleep, waiting for an ongoing dump to complete
 */
mali_error kbase_instr_hwcnt_disable(kbase_context *kctx)
{
	unsigned long flags, pm_flags;
	mali_error err = MALI_ERROR_FUNCTION_FAILED;
	u32 irq_mask;
	kbase_device *kbdev;

	KBASE_DEBUG_ASSERT(NULL != kctx);
	kbdev = kctx->kbdev;
	KBASE_DEBUG_ASSERT(NULL != kbdev);

	while (1) {
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DISABLED) {
			/* Instrumentation is not enabled */
			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
			goto out;
		}

		if (kbdev->hwcnt.kctx != kctx) {
			/* Instrumentation has been setup for another context */
			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
			goto out;
		}

		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE)
			break;

		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

		/* Ongoing dump/setup - wait for its completion */
		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);

	}

	kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED;
	kbdev->hwcnt.triggered = 0;

	/* Disable interrupt */
	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);

	/* Disable the counters */
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);

	kbdev->hwcnt.kctx = NULL;
	kbdev->hwcnt.addr = 0ULL;

	kbase_pm_ca_instr_disable(kbdev);

	kbase_pm_unrequest_cores(kbdev, MALI_TRUE, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));

	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

	/* Release the context. This had its own Power Manager Active reference */
	kbasep_js_release_privileged_ctx(kbdev, kctx);

	/* Also release our Power Manager Active reference */
	kbase_pm_context_idle(kbdev);

	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);

	err = MALI_ERROR_NONE;

 out:
	return err;
}
示例#12
0
void kbasep_js_try_schedule_head_ctx( kbase_device *kbdev )
{
	kbasep_js_device_data *js_devdata;
	mali_bool has_kctx;
	kbase_context *head_kctx;
	kbasep_js_kctx_info *js_kctx_info;
	mali_bool is_runpool_full;

	OSK_ASSERT( kbdev != NULL );

	js_devdata = &kbdev->js_data;

	/* Make a speculative check on the Run Pool - this MUST be repeated once
	 * we've obtained a context from the queue and reobtained the Run Pool
	 * lock */
	osk_mutex_lock( &js_devdata->runpool_mutex );
	is_runpool_full = (mali_bool)( js_devdata->nr_contexts_running >= kbdev->nr_address_spaces );
	osk_mutex_unlock( &js_devdata->runpool_mutex );

	if ( is_runpool_full != MALI_FALSE )
	{
		/* No free address spaces - nothing to do */
		return;
	}

	/* Grab the context off head of queue - if there is one */
	osk_mutex_lock( &js_devdata->queue_mutex );
	has_kctx = kbasep_js_policy_dequeue_head_ctx( &js_devdata->policy, &head_kctx );
	osk_mutex_unlock( &js_devdata->queue_mutex );

	if ( has_kctx == MALI_FALSE )
	{
		/* No ctxs to run - nothing to do */
		return;
	}
	js_kctx_info = &head_kctx->jctx.sched_info;

	OSK_PRINT_INFO(OSK_BASE_JM, "JS: Dequeue Context %p", head_kctx );

	/*
	 * Atomic transaction on the Context and Run Pool begins
	 */
	osk_mutex_lock( &js_kctx_info->ctx.jsctx_mutex );
	osk_mutex_lock( &js_devdata->runpool_mutex );

	/* Re-check to see if the Run Pool is full */
	is_runpool_full = (mali_bool)( js_devdata->nr_contexts_running >= kbdev->nr_address_spaces );
	if ( is_runpool_full != MALI_FALSE )
	{
		/* No free address spaces - roll back the transaction so far and return */
		osk_mutex_unlock( &js_devdata->runpool_mutex );

		/* Only requeue if not dying - which might occur through zapping-whilst-scheduling */
		if ( js_kctx_info->ctx.is_dying == MALI_FALSE )
		{
			OSK_PRINT_INFO(OSK_BASE_JM, "JS: Transaction rollback: Requeue Context %p", head_kctx );

			osk_mutex_lock( &js_devdata->queue_mutex );
			kbasep_js_policy_enqueue_ctx( &js_devdata->policy, head_kctx );
			osk_mutex_unlock( &js_devdata->queue_mutex );
		}
		else
		{
			OSK_PRINT_INFO(OSK_BASE_JM, "JS: Transaction rollback: Context %p is dying. Kill remaining jobs and pm-idle ctx", head_kctx );
			OSK_ASSERT( js_kctx_info->ctx.nr_jobs > 0 );
			/* Notify PM that a context has gone idle */
			kbase_pm_context_idle(kbdev);

			/* Kill all the jobs present (call kbase_jd_cancel on all jobs) */
			kbasep_js_policy_kill_all_ctx_jobs( &js_devdata->policy, head_kctx );

			/* Nothing more to be done to kill the context here, kbase_jd_zap_context
			 * waits for all jobs to be cancelled */
		}

		osk_mutex_unlock( &js_kctx_info->ctx.jsctx_mutex );
		return;
	}

	OSK_PRINT_INFO(OSK_BASE_JM, "JS: RunPool Add Context %p", head_kctx );

	/* update book-keeping info */
	js_kctx_info->ctx.is_scheduled = MALI_TRUE;
	++(js_devdata->nr_contexts_running);
	/* Cause any future waiter-on-termination to wait until the context is
	 * descheduled */
	osk_waitq_clear( &js_kctx_info->ctx.not_scheduled_waitq );


	/* Do all the necessaries to pick the address space (inc. update book-keeping info)
	 * Add the context to the Run Pool, and allow it to run jobs */
	assign_and_activate_kctx_addr_space( kbdev, head_kctx );

	/* Check and setup HW counters dumping */
	osk_spinlock_lock(&kbdev->hwcnt_lock);
	osk_spinlock_irq_lock(&js_devdata->runpool_irq.lock);
	if (head_kctx == kbdev->hwcnt_context &&
		kbdev->hwcnt_is_setup == MALI_FALSE)
	{
		/* Setup the base address */
#if BASE_HW_ISSUE_8186
		u32 val;
		/* Save and clear PRFCNT_TILER_EN */
		val = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), head_kctx);
		if(0 != val)
		{
			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, head_kctx);
		}
		/* Update PRFCNT_CONFIG with TILER_EN = 0 */
		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (head_kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, head_kctx);
		/* Restore PRFCNT_TILER_EN */
		if(0 != val)
		{
			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),  val, head_kctx);
		}
#else
		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (head_kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, head_kctx);
#endif
		/* Prevent the context to be scheduled out */
		kbasep_js_runpool_retain_ctx_nolock(kbdev, head_kctx);

		kbdev->hwcnt_is_setup = MALI_TRUE;
	}
	osk_spinlock_irq_unlock(&js_devdata->runpool_irq.lock);
	osk_spinlock_unlock(&kbdev->hwcnt_lock);

	/* Try to run the next job, in case this context has jobs that match the
	 * job slot requirements, but none of the other currently running contexts
	 * do */
	kbasep_js_try_run_next_job( kbdev );

	/* Transaction complete */
	osk_mutex_unlock( &js_devdata->runpool_mutex );
	osk_mutex_unlock( &js_kctx_info->ctx.jsctx_mutex );
	/* Note: after this point, the context could potentially get scheduled out immediately */
}
示例#13
0
static base_jd_event_code kbase_dump_cpu_gpu_time(kbase_context *kctx, mali_addr64 jc)
{
	kbase_va_region *reg;
	osk_phy_addr addr;
	u64 pfn;
	u32 offset;
	char *page;
	osk_timeval tv;
	base_dump_cpu_gpu_counters data;
	u64 system_time;
	u64 cycle_counter;

	u32 hi1, hi2;

	OSK_MEMSET(&data, 0, sizeof(data));

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
	do {
		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
		cycle_counter = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
		cycle_counter |= (((u64)hi1) << 32);
	} while (hi1 != hi2);

	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
	do {
		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
		system_time = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
		system_time |= (((u64)hi1) << 32);
	} while (hi1 != hi2);

	/* Record the CPU's idea of current time */
	osk_gettimeofday(&tv);

	data.sec = tv.tv_sec;
	data.usec = tv.tv_usec;
	data.system_time = system_time;
	data.cycle_counter = cycle_counter;

	pfn = jc >> 12;
	offset = jc & 0xFFF;

	if (offset > 0x1000-sizeof(data))
	{
		/* Wouldn't fit in the page */
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	reg = kbase_region_lookup(kctx, jc);
	if (!reg)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	if (! (reg->flags & KBASE_REG_GPU_RW) )
	{
		/* Region is not writable by GPU so we won't write to it either */
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	if (!reg->phy_pages)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	addr = reg->phy_pages[pfn - reg->start_pfn];
	if (!addr)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}

	page = osk_kmap(addr);
	if (!page)
	{
		return BASE_JD_EVENT_JOB_CANCELLED;
	}
	memcpy(page+offset, &data, sizeof(data));
	osk_kunmap(addr, page);

	return BASE_JD_EVENT_DONE;
}
示例#14
0
static void page_fault_worker(struct work_struct *data)
{
	u64 fault_pfn;
	u32 new_pages;
	u32 fault_rel_pfn;
	kbase_as * faulting_as;
	int as_no;
	kbase_context * kctx;
	kbase_device * kbdev;
	kbase_va_region *region;
	mali_error err;

	u32 fault_status;

	faulting_as = container_of(data, kbase_as, work_pagefault);
	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
	as_no = faulting_as->number;

	kbdev = container_of( faulting_as, kbase_device, as[as_no] );

	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
	 *
	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
	kctx = kbasep_js_runpool_lookup_ctx_noretain( kbdev, as_no );

	if ( kctx == NULL )
	{
		/* Address space has no context, terminate the work */
		u32 reg;
		/* AS transaction begin */
		mutex_lock(&faulting_as->transaction_mutex);
		reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), NULL);
		reg = (reg & (~(u32)MMU_TRANSTAB_ADRMODE_MASK)) | ASn_TRANSTAB_ADRMODE_UNMAPPED;
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, NULL);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL);
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
		mutex_unlock(&faulting_as->transaction_mutex);
		/* AS transaction end */

		mmu_mask_reenable(kbdev, NULL, faulting_as);
		return;
	}

	fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTSTATUS), NULL);

	OSK_ASSERT( kctx->kbdev == kbdev );

	kbase_gpu_vm_lock(kctx);

	/* find the region object for this VA */
	region = kbase_region_tracker_find_region_enclosing_address(kctx, faulting_as->fault_addr);
	if (NULL == region || (GROWABLE_FLAGS_REQUIRED != (region->flags & GROWABLE_FLAGS_MASK)))
	{
		kbase_gpu_vm_unlock(kctx);
		/* failed to find the region or mismatch of the flags */
		kbase_mmu_report_fault_and_kill(kctx, faulting_as, faulting_as->fault_addr);
		goto fault_done;
	}

	if ((((fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_READ) &&
	        !(region->flags & KBASE_REG_GPU_RD)) ||
	    (((fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_WRITE) &&
	        !(region->flags & KBASE_REG_GPU_WR)) ||
	    (((fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_EX) &&
	        (region->flags & KBASE_REG_GPU_NX)))
	{
		OSK_PRINT_WARN(OSK_BASE_MMU, "Access permissions don't match: region->flags=0x%x", region->flags);
		kbase_gpu_vm_unlock(kctx);
		kbase_mmu_report_fault_and_kill(kctx, faulting_as, faulting_as->fault_addr);
		goto fault_done;
	}

	/* find the size we need to grow it by */
	/* we know the result fit in a u32 due to kbase_region_tracker_find_region_enclosing_address
	 * validating the fault_adress to be within a u32 from the start_pfn */
	fault_rel_pfn = fault_pfn - region->start_pfn;
	
	if (fault_rel_pfn < region->nr_alloc_pages)
	{
		OSK_PRINT_WARN(OSK_BASE_MMU, "Fault in allocated region of growable TMEM: Ignoring");
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
		mmu_mask_reenable(kbdev, kctx, faulting_as);
		kbase_gpu_vm_unlock(kctx);
		goto fault_done;
	}

	new_pages = make_multiple(fault_rel_pfn - region->nr_alloc_pages + 1, region->extent);
	if (new_pages + region->nr_alloc_pages > region->nr_pages)
	{
		/* cap to max vsize */
		new_pages = region->nr_pages - region->nr_alloc_pages;
	}

	if (0 == new_pages)
	{
		/* Duplicate of a fault we've already handled, nothing to do */
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
		mmu_mask_reenable(kbdev, kctx, faulting_as);
		kbase_gpu_vm_unlock(kctx);
		goto fault_done;
	}

	if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region, new_pages))
	{
		/* alloc success */
		mali_addr64 lock_addr;
		OSK_ASSERT(region->nr_alloc_pages <= region->nr_pages);

		/* AS transaction begin */
		mutex_lock(&faulting_as->transaction_mutex);

		/* Lock the VA region we're about to update */
		lock_addr = lock_region(kbdev, faulting_as->fault_addr >> PAGE_SHIFT, new_pages);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_HI), lock_addr >> 32, kctx);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx);

		/* set up the new pages */
		err = kbase_mmu_insert_pages(kctx, region->start_pfn + region->nr_alloc_pages - new_pages,
		                             &region->phy_pages[region->nr_alloc_pages - new_pages],
		                             new_pages, region->flags);
		if(MALI_ERROR_NONE != err)
		{
			/* failed to insert pages, handle as a normal PF */
			mutex_unlock(&faulting_as->transaction_mutex);
			kbase_gpu_vm_unlock(kctx);
			/* The locked VA region will be unlocked and the cache invalidated in here */
			kbase_mmu_report_fault_and_kill(kctx, faulting_as, faulting_as->fault_addr);
			goto fault_done;
		}

#ifdef CONFIG_MALI_GATOR_SUPPORT
		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
#endif /* CONFIG_MALI_GATOR_SUPPORT */
		/* clear the irq */
		/* MUST BE BEFORE THE FLUSH/UNLOCK */
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);

		/* flush L2 and unlock the VA (resumes the MMU) */
		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
		{
			kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx);
		}
		else
		{
			kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH_PT, kctx);
		}

		/* wait for the flush to complete */
		while (kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_STATUS), kctx) & 1);

		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630))
		{
			/* Issue an UNLOCK command to ensure that valid page tables are re-read by the GPU after an update.
			Note that, the FLUSH command should perform all the actions necessary, however the bus logs show
			that if multiple page faults occur within an 8 page region the MMU does not always re-read the
			updated page table entries for later faults or is only partially read, it subsequently raises the
			page fault IRQ for the same addresses, the unlock ensures that the MMU cache is flushed, so updates
			can be re-read.  As the region is now unlocked we need to issue 2 UNLOCK commands in order to flush the
			MMU/uTLB, see PRLAM-8812.
                        */
			kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
			kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
		}

		mutex_unlock(&faulting_as->transaction_mutex);
		/* AS transaction end */

		/* reenable this in the mask */
		mmu_mask_reenable(kbdev, kctx, faulting_as);
		kbase_gpu_vm_unlock(kctx);
	}
/**
 * @brief Disable HW counters collection
 *
 * Note: might sleep, waiting for an ongoing dump to complete
 */
mali_error kbase_instr_hwcnt_disable_sec(struct kbase_context *kctx)
{
	unsigned long flags, pm_flags;
	mali_error err = MALI_ERROR_FUNCTION_FAILED;
	u32 irq_mask;
	struct kbase_device *kbdev;

	KBASE_DEBUG_ASSERT(NULL != kctx);
	kbdev = kctx->kbdev;
	KBASE_DEBUG_ASSERT(NULL != kbdev);

	/* MALI_SEC 140925 */
	flush_work(&kbdev->hwcnt.cache_clean_work);

	while (1) {
		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DISABLED) {
			/* Instrumentation is not enabled */
			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
			goto out;
		}

		if (kbdev->hwcnt.kctx != kctx) {
			/* Instrumentation has been setup for another context */
			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
			goto out;
		}

		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE)
			break;

		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

		/* Ongoing dump/setup - wait for its completion */
		if (wait_event_timeout(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0, kbdev->hwcnt.timeout) == 0)
			kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
	}

	kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED;
	kbdev->hwcnt.triggered = 0;

	/* Disable interrupt */
	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);

	/* Disable the counters */
	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);

	kbdev->hwcnt.kctx = NULL;
	kbdev->hwcnt.addr = 0ULL;

	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);

	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);

	err = MALI_ERROR_NONE;

 out:

	kbdev->hwcnt.trig_exception = 0;

	return err;
}
示例#16
0
static void page_fault_worker(osk_workq_work *data)
{
	u64 fault_pfn;
	u32 new_pages;
	u32 fault_rel_pfn;
	kbase_as * faulting_as;
	int as_no;
	kbase_context * kctx;
	kbase_device * kbdev;
	kbase_va_region *region;
	mali_error err;

	faulting_as = CONTAINER_OF(data, kbase_as, work_pagefault);
	fault_pfn = faulting_as->fault_addr >> OSK_PAGE_SHIFT;
	as_no = faulting_as->number;

	kbdev = CONTAINER_OF( faulting_as, kbase_device, as[as_no] );

	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
	 *
	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
	kctx = kbasep_js_runpool_lookup_ctx_noretain( kbdev, as_no );

	if ( kctx == NULL )
	{
		/* Address space has no context, terminate the work */
		u32 reg;
		/* AS transaction begin */
		osk_mutex_lock(&faulting_as->transaction_mutex);
		reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), NULL);
		reg = (reg & (~(u32)MMU_TRANSTAB_ADRMODE_MASK)) | ASn_TRANSTAB_ADRMODE_UNMAPPED;
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, NULL);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL);
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
		osk_mutex_unlock(&faulting_as->transaction_mutex);
		/* AS transaction end */

		mmu_mask_reenable(kbdev, NULL, faulting_as);
		return;
	}


	OSK_ASSERT( kctx->kbdev == kbdev );

	kbase_gpu_vm_lock(kctx);

	/* find the region object for this VA */
	region = kbase_region_lookup(kctx, faulting_as->fault_addr);
	if (NULL == region || (GROWABLE_FLAGS_REQUIRED != (region->flags & GROWABLE_FLAGS_MASK)))
	{
		kbase_gpu_vm_unlock(kctx);
		/* failed to find the region or mismatch of the flags */
		kbase_mmu_report_fault_and_kill(kctx, faulting_as, faulting_as->fault_addr);
		goto fault_done;
	}

	/* find the size we need to grow it by */
	/* we know the result fit in a u32 due to kbase_region_lookup
	 * validating the fault_adress to be within a u32 from the start_pfn */
	fault_rel_pfn = fault_pfn - region->start_pfn;
	new_pages = make_multiple(fault_rel_pfn - region->nr_alloc_pages + 1, region->extent);
	if (new_pages + region->nr_alloc_pages > region->nr_pages)
	{
		/* cap to max vsize */
		new_pages = region->nr_pages - region->nr_alloc_pages;
	}

	if (0 == new_pages)
	{
		/* Duplicate of a fault we've already handled, nothing to do */
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
		mmu_mask_reenable(kbdev, kctx, faulting_as);
		kbase_gpu_vm_unlock(kctx);
		goto fault_done;
	}

	if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region, new_pages))
	{
		/* alloc success */
		mali_addr64 lock_addr;
		OSK_ASSERT(region->nr_alloc_pages <= region->nr_pages);

		/* AS transaction begin */
		osk_mutex_lock(&faulting_as->transaction_mutex);

		/* Lock the VA region we're about to update */
		lock_addr = lock_region(faulting_as->fault_addr >> OSK_PAGE_SHIFT, new_pages);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_HI), lock_addr >> 32, kctx);
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx);

		/* set up the new pages */
		err = kbase_mmu_insert_pages(kctx, region->start_pfn + region->nr_alloc_pages - new_pages,
		                             &region->phy_pages[region->nr_alloc_pages - new_pages],
		                             new_pages, region->flags);
		if(MALI_ERROR_NONE != err)
		{
			/* failed to insert pages, handle as a normal PF */
			osk_mutex_unlock(&faulting_as->transaction_mutex);
			kbase_gpu_vm_unlock(kctx);
			/* The locked VA region will be unlocked and the cache invalidated in here */
			kbase_mmu_report_fault_and_kill(kctx, faulting_as, faulting_as->fault_addr);
			goto fault_done;
		}

		/* clear the irq */
		/* MUST BE BEFORE THE FLUSH/UNLOCK */
		kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);

		/* flush L2 and unlock the VA (resumes the MMU) */
		kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx);

		/* wait for the flush to complete */
		while (kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_STATUS), kctx) & 1);

		osk_mutex_unlock(&faulting_as->transaction_mutex);
		/* AS transaction end */

		/* reenable this in the mask */
		mmu_mask_reenable(kbdev, kctx, faulting_as);
		kbase_gpu_vm_unlock(kctx);
	}
void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
					struct kbase_gpuprops_regdump *regdump)
{
	int i;

	/* Fill regdump with the content of the relevant registers */
	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);

	regdump->l2_features = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(L2_FEATURES), NULL);
	regdump->suspend_size = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
	regdump->tiler_features = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(TILER_FEATURES), NULL);
	regdump->mem_features = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(MEM_FEATURES), NULL);
	regdump->mmu_features = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(MMU_FEATURES), NULL);
	regdump->as_present = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(AS_PRESENT), NULL);
	regdump->js_present = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(JS_PRESENT), NULL);

	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
		regdump->js_features[i] = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);

	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
		regdump->texture_features[i] = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);

	regdump->thread_max_threads = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
									NULL);
	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
	regdump->thread_features = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(THREAD_FEATURES), NULL);

	regdump->shader_present_lo = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
	regdump->shader_present_hi = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);

	regdump->tiler_present_lo = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
	regdump->tiler_present_hi = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);

	regdump->l2_present_lo = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
	regdump->l2_present_hi = kbase_reg_read(kbdev,
				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
}