void base_dep_release(base_context *ctx, base_jd_dep *dep)
{
	CDBG_ASSERT_POINTER(ctx);
	CDBG_ASSERT_POINTER(dep);

	osu_mutex_lock(&ctx->jd_context.lock);
	base_sem_release(ctx, dep->dep[0]);
	base_sem_release(ctx, dep->dep[1]);
	osu_mutex_unlock(&ctx->jd_context.lock);
}
mali_error base_context_hwcnt_enable(base_context* base_ctx, mali_addr64 dump_addr, u32 jm_bm, u32 shader_bm, u32 tiler_bm, u32 l3_cache_bm, u32 mmu_l2_bm)
{
	mali_error err;
	kbase_uk_hwcnt_setup hwcnt_setup;

	CDBG_ASSERT_POINTER(base_ctx);
	CDBG_ASSERT(0 == (dump_addr & (2048-1))); /* alignment check */

	hwcnt_setup.header.id = KBASE_FUNC_HWCNT_SETUP;
	hwcnt_setup.dump_buffer = dump_addr;
	hwcnt_setup.jm_bm = jm_bm;
	hwcnt_setup.shader_bm = shader_bm;
	hwcnt_setup.tiler_bm = tiler_bm;
	hwcnt_setup.l3_cache_bm = l3_cache_bm;
	hwcnt_setup.mmu_l2_bm = mmu_l2_bm;

	err = uku_call(&base_ctx->uk_ctx, &hwcnt_setup, sizeof(hwcnt_setup));

	if (MALI_ERROR_NONE == err)
	{
		err = hwcnt_setup.header.ret;
	}

	return err;
}
void base_uk_ctx_terminate(base_context * base_ctx)
{
	CDBG_ASSERT_POINTER(base_ctx);

	basep_unmap_ringbuffer(base_ctx);
	uku_close(&base_ctx->uk_ctx);
}
u32 basep_gpu_props_get_num_coherent_groups( base_context *ctx )
{
    CDBG_ASSERT_POINTER( ctx );

    UNUSED( ctx );

    switch ( BASEP_DUMMY_GPU_CORES )
    {
    case 1:
    case 2:
    case 3:
    case 4:
        return 1;

    case 6:
    case 8:
        return 2;

    default:
        CDBG_ASSERT_MSG( MALI_FALSE, "Unreachable\n" );
        break;
    }

    return 0;
}
void base_jd_submit_bag(base_context *base_ctx, const base_jd_bag *bag)
{
	mali_error err;
	kbase_uk_job_submit ukbag;

	CDBG_ASSERT_POINTER(base_ctx);
	CDBG_ASSERT(MALI_FALSE == base_ctx->dispatch_disabled);
	CDBG_ASSERT_MSG( 0u == bag->core_restriction, "Use of core_restriction is deprecated" );

	ukbag.header.id = KBASE_FUNC_JOB_SUBMIT;

	ukbag.bag_uaddr = (uintptr_t)bag;
	ukbag.core_restriction = 0u;
	ukbag.offset = bag->offset;
	ukbag.size = bag->size;
	ukbag.nr_atoms = bag->nr_atoms;

	err = uku_call(&base_ctx->uk_ctx, &ukbag, sizeof(ukbag));

	CDBG_ASSERT_MSG(
		err == MALI_ERROR_NONE && ukbag.header.ret == MALI_ERROR_NONE,
		"bag submission error swallowed (%d, %d)", err, ukbag.header.ret
		);
		
	return;
}
base_tmem_handle base_tmem_from_ump(base_context *ctx, ump_secure_id ump_id, u64 * const pages)
{
	mali_error err;

	kbase_uk_tmem_from_ump tmem_ump;

	CDBG_ASSERT_POINTER(ctx);

	tmem_ump.header.id = KBASE_FUNC_TMEM_FROM_UMP;
	tmem_ump.id = ump_id;

	err = uku_call(&ctx->uk_ctx, &tmem_ump, sizeof(tmem_ump));
	if (MALI_ERROR_NONE == err)
	{
		err = tmem_ump.header.ret;
		if (MALI_ERROR_NONE == err)
		{
			if (NULL != pages)
				*pages = tmem_ump.pages;
			
			return tmem_ump.gpu_addr;
		}
	}

	return 0ULL;
}
void base_bag_release_buffer(base_context *ctx, base_jd_bag *bag)
{
	int is_first;
	base_jd_bag *tmp;
	base_jd_bag *next;
	
	CDBG_ASSERT_POINTER(ctx);

	osu_mutex_lock(&ctx->jd_context.lock);

	tmp = CUTILS_DLIST_FRONT(&ctx->jd_context.bag_list, base_jd_bag, entry);
	if (bag == tmp)
		is_first = 1;
	else
		is_first = 0;

	next = CUTILS_DLIST_REMOVE_AND_RETURN_NEXT(&ctx->jd_context.bag_list,
						   bag, base_jd_bag, entry);

	if (CUTILS_DLIST_IS_EMPTY(&ctx->jd_context.bag_list)) {
		ctx->jd_context.head = ctx->jd_context.tail;
	} else {
		if (is_first) {
			ctx->jd_context.head = next->offset;
		}
	}
	
	osu_mutex_unlock(&ctx->jd_context.lock);
}
void base_tmem_free(base_context * base_ctx, base_tmem_handle handle)
{
	mali_error err;
	CDBG_ASSERT_POINTER(base_ctx);

	err = basep_do_mem_free(&base_ctx->uk_ctx, handle);
	CDBG_ASSERT(MALI_ERROR_NONE == err);
}
void base_jd_event_term(base_context *base_ctx)
{
	CDBG_ASSERT_POINTER(base_ctx);

	base_ctx->dispatch_disabled = MALI_TRUE;
	post_term(&base_ctx->uk_ctx);
	/* wait for term recvd */
	osu_sem_wait(&base_ctx->shutdown_sem, 0);
	osu_sem_term(&base_ctx->shutdown_sem);
}
void base_context_reg_trace_disable(base_context * ctx)
{
	CDBG_ASSERT_POINTER(ctx);
	CDBG_ASSERT(NULL != ctx->jd_context.tb);
	if (NULL == ctx->jd_context.tb)
	{
		/* ignore the unmatched disable call */
		return;
	}
	basep_unmap_trace_buffer(ctx);
}
mali_error base_context_reg_trace_enable(base_context *ctx, size_t trace_buffer_size, void** mapping)
{
	mali_error err;

	CDBG_ASSERT_POINTER(ctx);
	CDBG_ASSERT_POINTER(mapping);
	CDBG_ASSERT(NULL == ctx->jd_context.tb);

	err = basep_map_trace_buffer(ctx, trace_buffer_size);

	if (MALI_ERROR_NONE != err)
	{
		return err;
	}

	*mapping = ctx->jd_context.tb;

	return MALI_ERROR_NONE;

}
void base_context_hwcnt_disable(base_context * base_ctx)
{
	kbase_uk_hwcnt_setup hwcnt_setup;
	mali_error err;

	CDBG_ASSERT_POINTER(base_ctx);
	hwcnt_setup.header.id = KBASE_FUNC_HWCNT_SETUP;
	hwcnt_setup.dump_buffer = 0ULL;
	err = uku_call(&base_ctx->uk_ctx, &hwcnt_setup, sizeof(hwcnt_setup));
	CDBG_ASSERT_MSG(
		err == MALI_ERROR_NONE && hwcnt_setup.header.ret == MALI_ERROR_NONE, 
		"disabling hw counter failed; swallowing error (%d, %d)", err, hwcnt_setup.header.ret
		);
}
mali_error basep_fill_gpu_props( struct mali_base_gpu_props *const gpu_props,
                                 u32 props_size,
                                 base_context *ctx )
{
    mali_error err;

    CDBG_ASSERT_POINTER( gpu_props );
    CDBG_ASSERT_POINTER( ctx );


    /* Copy everything but the coherency information */
    STDLIB_MEMCPY( gpu_props, &basep_gpu_props_template, offsetof(struct mali_base_gpu_props, coherency_info) );

    /* Modify members that need an update */
    gpu_props->core_props.gpu_available_memory_size = get_available_gpu_memory( ctx );

    /* Construct the Coherency information */

    err = basep_gpu_props_construct_coherency( &gpu_props->coherency_info,
            props_size-offsetof(struct mali_base_gpu_props, coherency_info),
            ctx );
    return err;
}
mali_error base_dep_acquire(base_context *ctx, base_jd_dep *dep, int nr)
{
	mali_error ret = MALI_ERROR_NONE;
	int i;

	CDBG_ASSERT_POINTER(ctx);
	CDBG_ASSERT_POINTER(dep);
	CDBG_ASSERT((nr == 1) || (nr == 2));

	/* Please turn this into spin/cheap locks... */
	osu_mutex_lock(&ctx->jd_context.lock);

	for (i = 0; i < nr; i++) {
		dep->dep[i] = base_sem_acquire(ctx);
		if (!dep->dep[i])
			break;
	}

	if (i < nr) {
		if (1 == i)
		{
			base_sem_release(ctx, dep->dep[0]);
		}
		ret = MALI_ERROR_FUNCTION_FAILED;
	}

	/*
	 * If we were asked for a single dependency, make sure we wipe
	 * the second one. It helps keeping the release code simple.
	 */
	if (nr == 1)
		dep->dep[1] = 0;

	osu_mutex_unlock(&ctx->jd_context.lock);
	return ret;
}
mali_error base_context_hwcnt_dump(base_context * base_ctx)
{
	mali_error err;
	kbase_uk_hwcnt_dump dump;

	CDBG_ASSERT_POINTER(base_ctx);

	dump.header.id = KBASE_FUNC_HWCNT_DUMP;
	err = uku_call(&base_ctx->uk_ctx, &dump, sizeof(dump));

	if (MALI_ERROR_NONE == err)
	{
		err = dump.header.ret;
	}

	return err;
}
STATIC u64 get_available_gpu_memory(  base_context *ctx )
{
    osu_errcode err;
    osu_cpu_props props;
    CDBG_ASSERT_POINTER( ctx );

    UNUSED( ctx );

    /* Assumes all OS Memory. Does not account for dedicated allocators */
    err = osu_cpu_props_get( &props );

    if ( err != OSU_ERR_OK )
    {
        CDBG_PRINT_WARN( CDBG_BASE, "osu_cpu_props_get() failed, available gpu memory will be reported as 0" );
        return 0;
    }

    return props.available_memory_size;
}
mali_error base_uk_ctx_alloc(base_context * base_ctx, u32 flags)
{
	size_t pool_size = BASEP_JCTX_RB_NRPAGES << 12;

	uku_client_version client_version;
	uku_open_status open_status;

	CDBG_ASSERT_POINTER(base_ctx);

	if ( BDBG_SIMULATE_FAILURE( CDBG_BASE ) )
	{ 
		return MALI_ERROR_FUNCTION_FAILED;
	}

	if(OSU_ERR_OK != osu_sem_init(&base_ctx->shutdown_sem, 0) )
	{
		goto fail_sem_init;
	}
	base_ctx->jd_context.size = pool_size;

	client_version.major = BASE_UK_VERSION_MAJOR;
	client_version.minor = BASE_UK_VERSION_MINOR;
	open_status = uku_open(UK_CLIENT_MALI_T600_BASE, 0, &client_version, &base_ctx->uk_ctx);
	if (UKU_OPEN_OK != open_status)
	{
		goto fail_open;
	}
	if (MALI_ERROR_NONE != basep_map_ringbuffer(base_ctx))
	{
		goto fail_rb_map;
	}

	return MALI_ERROR_NONE;

fail_rb_map:
	uku_close(&base_ctx->uk_ctx);
fail_open:
	osu_sem_term(&base_ctx->shutdown_sem);
fail_sem_init:
	return MALI_ERROR_FUNCTION_FAILED;
}
base_tmem_handle base_tmem_alloc_growable(base_context *base_ctx, u32 max_pages,
					  u32 init_pages, u32 extent_pages,
					  u32 flags)
{
	mali_error err;
	kbase_uk_tmem_alloc tmem;

	CDBG_ASSERT_POINTER(base_ctx);
	CDBG_ASSERT(0 == (flags & BASEP_MEM_IS_CACHED));

	if ( BDBG_SIMULATE_FAILURE( CDBG_BASE ) )
	{
		return BASE_TMEM_INVALID_HANDLE;
	}

	tmem.header.id  = KBASE_FUNC_TMEM_ALLOC;
	tmem.vsize  = max_pages;
#if MALI_HW_TYPE == 2
	tmem.psize  = init_pages;
#else
	tmem.psize = max_pages;
#endif
	tmem.extent = extent_pages;
	tmem.flags  = flags;

	err = uku_call(&base_ctx->uk_ctx, &tmem, sizeof(tmem));
	if (MALI_ERROR_NONE == err)
	{
		err = tmem.header.ret;
		if (MALI_ERROR_NONE == err)
		{
			return tmem.gpu_addr;
		}
	}

	return 0ULL;
}
void base_uk_ctx_free(base_context *base_ctx)
{
	CDBG_ASSERT_POINTER(base_ctx);
}
base_jd_atom *base_atom_get_address(const base_context *ctx, size_t offset)
{
	CDBG_ASSERT_POINTER(ctx);
	return (base_jd_atom *)((uintptr_t)ctx->jd_context.pool + offset);
}
static void basep_syncset_dump( base_context *base_ctx, base_syncset *sset )
{
	mali_error                err;
	kbase_uk_find_cpu_mapping find;

	CDBG_ASSERT_POINTER( base_ctx );
	CDBG_ASSERT_POINTER( sset );

	/* Call the kernel module to find the relevant CPU mapping of the memory
	   allocation within which synchronization is required. This call is
	   separate from KBASE_FUNC_SYNC so we can dump syncset information in
	   user mode. */
	find.header.id = KBASE_FUNC_FIND_CPU_MAPPING;

	find.gpu_addr = sset->basep_sset.mem_handle;
	find.cpu_addr = sset->basep_sset.user_addr;
	find.size = sset->basep_sset.size;

	err = uku_call(&base_ctx->uk_ctx, &find, sizeof(find));

	if ( MALI_ERROR_NONE == err )
	{
		mali_size64 sync_offset, map_offset;
		mali_addr64 gpu_va;

		/* Calculate the offset (in bytes) of the cache coherency operation from the
		   start of the sub-region mapped for CPU access. */
		CDBG_ASSERT_LEQ_U( find.uaddr, sset->basep_sset.user_addr );
		sync_offset = sset->basep_sset.user_addr - find.uaddr;
		
		/* Calculate the offset (in bytes) of the sub-region mapped for CPU access
		   from the start of the allocation. */
		CDBG_ASSERT_LEQ_U( find.page_off, U64_MAX >> CONFIG_CPU_PAGE_SIZE_LOG2 );
		map_offset = find.page_off << CONFIG_CPU_PAGE_SIZE_LOG2;

		/* Calculate the GPU virtual address of the start of the memory to be
		   synchronized, assuming the memory handle is actually the GPU virtual
		   address of the start of the allocation. */
		CDBG_ASSERT_LEQ_U( map_offset, U64_MAX - sync_offset );
		CDBG_ASSERT_LEQ_U( sset->basep_sset.mem_handle, U64_MAX - sync_offset - map_offset );
		gpu_va = sset->basep_sset.mem_handle + map_offset + sync_offset;

		switch ( sset->basep_sset.type )
		{
			case BASE_SYNCSET_OP_MSYNC:
				cinstr_base_dump_syncset_to_gpu( base_ctx,
				                                 gpu_va,
				                                 (void *)(uintptr_t)sset->basep_sset.user_addr,
				                                 sset->basep_sset.size );
				break;
	
			case BASE_SYNCSET_OP_CSYNC:
				cinstr_base_dump_syncset_to_cpu( base_ctx,
				                                 gpu_va,
				                                 (void *)(uintptr_t)sset->basep_sset.user_addr,
				                                 sset->basep_sset.size );
				break;
	
			default:
				CDBG_PRINT_ERROR( CDBG_BASE, "Unknown memory coherency operation type" );
				break;
		}
	}
base_jd_atom *base_bag_get_address(const base_context *ctx,
				   const base_jd_bag *bag)
{
	CDBG_ASSERT_POINTER(ctx);
	return (struct base_jd_atom *)((char *)ctx->jd_context.pool + bag->offset);
}
mali_error base_bag_acquire_buffer(base_context *ctx,
				   base_jd_bag *bag, size_t *req_size)
{
	size_t size;
	mali_error ret = MALI_ERROR_NONE;

	CDBG_ASSERT_POINTER(ctx);
	CDBG_ASSERT_POINTER(req_size);
	CDBG_ASSERT(0 != *req_size);

	size = *req_size;
	bag->size = size; /* Let's be optmistic... */
	
	osu_mutex_lock(&ctx->jd_context.lock);

	if (ctx->jd_context.tail >= ctx->jd_context.head) {
		/* fits at the end? */
		if (size <= (ctx->jd_context.size - ctx->jd_context.tail)) {
			bag->offset = ctx->jd_context.tail;
			ctx->jd_context.tail += size;
			goto out;
		}

		/* fits at the beginning? */
		if (size < ctx->jd_context.head) {
			bag->offset = 0;
			ctx->jd_context.tail = size;
			goto out;
		}

		/* Failed */
		/* check how much we can put there*/
		if(0 == ctx->jd_context.head){
			*req_size = (ctx->jd_context.size - ctx->jd_context.tail - 1); /* at the end*/
		}
		else{
			*req_size = /* at the end, no need to leave one byte here*/
					myownmax((ctx->jd_context.size - ctx->jd_context.tail),
						/* or at the beginning */
						ctx->jd_context.head - 1);
		}


	} else {
		/* Fits between tail and head? */
		if (size <= (ctx->jd_context.head - ctx->jd_context.tail - 1)) {
			bag->offset = ctx->jd_context.tail;
			ctx->jd_context.tail += size;
			goto out;
		}

		/* Failed */
		*req_size = (ctx->jd_context.head - ctx->jd_context.tail - 1);
	}

	ret = MALI_ERROR_FUNCTION_FAILED;
out:
	if (MALI_ERROR_NONE == ret){
		CUTILS_DLIST_PUSH_BACK(&ctx->jd_context.bag_list, bag,
				       base_jd_bag, entry);
	}

	osu_mutex_unlock(&ctx->jd_context.lock);
	return ret;
}
/*
 * Fill the coherency information for Mali-T600 cores
 *
 * group_info must point to storage for a mali_base_gpu_coherent_group_info
 * structure which is up to group_info_size bytes in length.
 */
STATIC mali_error basep_gpu_props_construct_coherency( struct mali_base_gpu_coherent_group_info * const group_info,
        u32 group_info_size,
        base_context *ctx )
{
    u8 *end_of_mem_ptr; /* For debug checks */
    struct mali_base_gpu_coherent_group *current_group;
    u32 num_groups;

    CDBG_ASSERT_POINTER( group_info );
    CDBG_ASSERT_POINTER( ctx );

    end_of_mem_ptr = ((u8*)group_info) + group_info_size;

    UNUSED( ctx );

    /*
     * This assert assumes at least one coherent group, and structure padding
     * has been allocated for the single group case
     */
    CDBG_ASSERT_LEQ( (uintptr_t)(group_info+1), (uintptr_t)end_of_mem_ptr );

    num_groups = basep_gpu_props_get_num_coherent_groups( ctx );

    group_info->num_groups = num_groups;
    group_info->num_core_groups = get_num_core_groups( ctx );

    /* Set coherency, using data structure builder functions. Must be
     * initialized to zero first. */
    STDLIB_MEMSET( &group_info->coherency, 0, sizeof(group_info->coherency) );
    midg_mem_features_set_coherent_core_group( &group_info->coherency,
            BASEP_T600_CORE_GROUP_COHERENT_BOOL );
    midg_mem_features_set_coherence_supergroup( &group_info->coherency,
            BASEP_T600_CORE_SUPERGROUP_COHERENT_BOOL );


    current_group = &group_info->group[0];

    switch ( BASEP_DUMMY_GPU_CORES )
    {
    case 1:
    case 2:
    case 3:
    case 4:
        CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr );
        current_group->core_mask = (1ULL << BASEP_DUMMY_GPU_CORES) - 1ULL;
        current_group->num_cores = BASEP_DUMMY_GPU_CORES;
        current_group->priority_required = BASEP_DUMMY_DEFAULT_JOB_PRIORITY_FOR_COREGROUP;
        /* NOTE: 1 L2 cache present */

        break;

    case 6:
    case 8:
    {
        u32 cores_per_group = BASEP_DUMMY_GPU_CORES/2;

        CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr );
        current_group->core_mask = (1ULL << cores_per_group) - 1ULL;
        current_group->num_cores = cores_per_group;
        current_group->priority_required = BASEP_DUMMY_DEFAULT_JOB_PRIORITY_FOR_COREGROUP;

        ++current_group;

        /*
         * 6-core systems have a 'gap' in the core mask, meaning that
         * the 2nd coregroup starts at offset 4.
         * 8-core systems naturally have the 2nd coregroup at offset 4
         */
        CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr );
        current_group->core_mask = ((1ULL << cores_per_group) - 1ULL) << 4;
        current_group->num_cores = cores_per_group;
        current_group->priority_required = BASEP_DUMMY_MIN_JOB_PRIORITY_FOR_COREGROUP;

        /* NOTE: 2 L2 caches present */
    }
    break;

    default:
        CDBG_ASSERT_MSG( MALI_FALSE, "Unreachable\n" );
        break;
    }

    /* groups_size only used for debug checks */
    UNUSED( end_of_mem_ptr );

    return MALI_ERROR_NONE;
}
/*
 * Public functions
 */
const struct mali_base_gpu_props *_mali_base_get_gpu_props( base_context *ctx )
{
    CDBG_ASSERT_POINTER( ctx );
    /* Just return the information cached in the base ctx */
    return ctx->gpu_props;
}