void base_dep_release(base_context *ctx, base_jd_dep *dep) { CDBG_ASSERT_POINTER(ctx); CDBG_ASSERT_POINTER(dep); osu_mutex_lock(&ctx->jd_context.lock); base_sem_release(ctx, dep->dep[0]); base_sem_release(ctx, dep->dep[1]); osu_mutex_unlock(&ctx->jd_context.lock); }
mali_error base_context_hwcnt_enable(base_context* base_ctx, mali_addr64 dump_addr, u32 jm_bm, u32 shader_bm, u32 tiler_bm, u32 l3_cache_bm, u32 mmu_l2_bm) { mali_error err; kbase_uk_hwcnt_setup hwcnt_setup; CDBG_ASSERT_POINTER(base_ctx); CDBG_ASSERT(0 == (dump_addr & (2048-1))); /* alignment check */ hwcnt_setup.header.id = KBASE_FUNC_HWCNT_SETUP; hwcnt_setup.dump_buffer = dump_addr; hwcnt_setup.jm_bm = jm_bm; hwcnt_setup.shader_bm = shader_bm; hwcnt_setup.tiler_bm = tiler_bm; hwcnt_setup.l3_cache_bm = l3_cache_bm; hwcnt_setup.mmu_l2_bm = mmu_l2_bm; err = uku_call(&base_ctx->uk_ctx, &hwcnt_setup, sizeof(hwcnt_setup)); if (MALI_ERROR_NONE == err) { err = hwcnt_setup.header.ret; } return err; }
void base_uk_ctx_terminate(base_context * base_ctx) { CDBG_ASSERT_POINTER(base_ctx); basep_unmap_ringbuffer(base_ctx); uku_close(&base_ctx->uk_ctx); }
u32 basep_gpu_props_get_num_coherent_groups( base_context *ctx ) { CDBG_ASSERT_POINTER( ctx ); UNUSED( ctx ); switch ( BASEP_DUMMY_GPU_CORES ) { case 1: case 2: case 3: case 4: return 1; case 6: case 8: return 2; default: CDBG_ASSERT_MSG( MALI_FALSE, "Unreachable\n" ); break; } return 0; }
void base_jd_submit_bag(base_context *base_ctx, const base_jd_bag *bag) { mali_error err; kbase_uk_job_submit ukbag; CDBG_ASSERT_POINTER(base_ctx); CDBG_ASSERT(MALI_FALSE == base_ctx->dispatch_disabled); CDBG_ASSERT_MSG( 0u == bag->core_restriction, "Use of core_restriction is deprecated" ); ukbag.header.id = KBASE_FUNC_JOB_SUBMIT; ukbag.bag_uaddr = (uintptr_t)bag; ukbag.core_restriction = 0u; ukbag.offset = bag->offset; ukbag.size = bag->size; ukbag.nr_atoms = bag->nr_atoms; err = uku_call(&base_ctx->uk_ctx, &ukbag, sizeof(ukbag)); CDBG_ASSERT_MSG( err == MALI_ERROR_NONE && ukbag.header.ret == MALI_ERROR_NONE, "bag submission error swallowed (%d, %d)", err, ukbag.header.ret ); return; }
base_tmem_handle base_tmem_from_ump(base_context *ctx, ump_secure_id ump_id, u64 * const pages) { mali_error err; kbase_uk_tmem_from_ump tmem_ump; CDBG_ASSERT_POINTER(ctx); tmem_ump.header.id = KBASE_FUNC_TMEM_FROM_UMP; tmem_ump.id = ump_id; err = uku_call(&ctx->uk_ctx, &tmem_ump, sizeof(tmem_ump)); if (MALI_ERROR_NONE == err) { err = tmem_ump.header.ret; if (MALI_ERROR_NONE == err) { if (NULL != pages) *pages = tmem_ump.pages; return tmem_ump.gpu_addr; } } return 0ULL; }
void base_bag_release_buffer(base_context *ctx, base_jd_bag *bag) { int is_first; base_jd_bag *tmp; base_jd_bag *next; CDBG_ASSERT_POINTER(ctx); osu_mutex_lock(&ctx->jd_context.lock); tmp = CUTILS_DLIST_FRONT(&ctx->jd_context.bag_list, base_jd_bag, entry); if (bag == tmp) is_first = 1; else is_first = 0; next = CUTILS_DLIST_REMOVE_AND_RETURN_NEXT(&ctx->jd_context.bag_list, bag, base_jd_bag, entry); if (CUTILS_DLIST_IS_EMPTY(&ctx->jd_context.bag_list)) { ctx->jd_context.head = ctx->jd_context.tail; } else { if (is_first) { ctx->jd_context.head = next->offset; } } osu_mutex_unlock(&ctx->jd_context.lock); }
void base_tmem_free(base_context * base_ctx, base_tmem_handle handle) { mali_error err; CDBG_ASSERT_POINTER(base_ctx); err = basep_do_mem_free(&base_ctx->uk_ctx, handle); CDBG_ASSERT(MALI_ERROR_NONE == err); }
void base_jd_event_term(base_context *base_ctx) { CDBG_ASSERT_POINTER(base_ctx); base_ctx->dispatch_disabled = MALI_TRUE; post_term(&base_ctx->uk_ctx); /* wait for term recvd */ osu_sem_wait(&base_ctx->shutdown_sem, 0); osu_sem_term(&base_ctx->shutdown_sem); }
void base_context_reg_trace_disable(base_context * ctx) { CDBG_ASSERT_POINTER(ctx); CDBG_ASSERT(NULL != ctx->jd_context.tb); if (NULL == ctx->jd_context.tb) { /* ignore the unmatched disable call */ return; } basep_unmap_trace_buffer(ctx); }
mali_error base_context_reg_trace_enable(base_context *ctx, size_t trace_buffer_size, void** mapping) { mali_error err; CDBG_ASSERT_POINTER(ctx); CDBG_ASSERT_POINTER(mapping); CDBG_ASSERT(NULL == ctx->jd_context.tb); err = basep_map_trace_buffer(ctx, trace_buffer_size); if (MALI_ERROR_NONE != err) { return err; } *mapping = ctx->jd_context.tb; return MALI_ERROR_NONE; }
void base_context_hwcnt_disable(base_context * base_ctx) { kbase_uk_hwcnt_setup hwcnt_setup; mali_error err; CDBG_ASSERT_POINTER(base_ctx); hwcnt_setup.header.id = KBASE_FUNC_HWCNT_SETUP; hwcnt_setup.dump_buffer = 0ULL; err = uku_call(&base_ctx->uk_ctx, &hwcnt_setup, sizeof(hwcnt_setup)); CDBG_ASSERT_MSG( err == MALI_ERROR_NONE && hwcnt_setup.header.ret == MALI_ERROR_NONE, "disabling hw counter failed; swallowing error (%d, %d)", err, hwcnt_setup.header.ret ); }
mali_error basep_fill_gpu_props( struct mali_base_gpu_props *const gpu_props, u32 props_size, base_context *ctx ) { mali_error err; CDBG_ASSERT_POINTER( gpu_props ); CDBG_ASSERT_POINTER( ctx ); /* Copy everything but the coherency information */ STDLIB_MEMCPY( gpu_props, &basep_gpu_props_template, offsetof(struct mali_base_gpu_props, coherency_info) ); /* Modify members that need an update */ gpu_props->core_props.gpu_available_memory_size = get_available_gpu_memory( ctx ); /* Construct the Coherency information */ err = basep_gpu_props_construct_coherency( &gpu_props->coherency_info, props_size-offsetof(struct mali_base_gpu_props, coherency_info), ctx ); return err; }
mali_error base_dep_acquire(base_context *ctx, base_jd_dep *dep, int nr) { mali_error ret = MALI_ERROR_NONE; int i; CDBG_ASSERT_POINTER(ctx); CDBG_ASSERT_POINTER(dep); CDBG_ASSERT((nr == 1) || (nr == 2)); /* Please turn this into spin/cheap locks... */ osu_mutex_lock(&ctx->jd_context.lock); for (i = 0; i < nr; i++) { dep->dep[i] = base_sem_acquire(ctx); if (!dep->dep[i]) break; } if (i < nr) { if (1 == i) { base_sem_release(ctx, dep->dep[0]); } ret = MALI_ERROR_FUNCTION_FAILED; } /* * If we were asked for a single dependency, make sure we wipe * the second one. It helps keeping the release code simple. */ if (nr == 1) dep->dep[1] = 0; osu_mutex_unlock(&ctx->jd_context.lock); return ret; }
mali_error base_context_hwcnt_dump(base_context * base_ctx) { mali_error err; kbase_uk_hwcnt_dump dump; CDBG_ASSERT_POINTER(base_ctx); dump.header.id = KBASE_FUNC_HWCNT_DUMP; err = uku_call(&base_ctx->uk_ctx, &dump, sizeof(dump)); if (MALI_ERROR_NONE == err) { err = dump.header.ret; } return err; }
STATIC u64 get_available_gpu_memory( base_context *ctx ) { osu_errcode err; osu_cpu_props props; CDBG_ASSERT_POINTER( ctx ); UNUSED( ctx ); /* Assumes all OS Memory. Does not account for dedicated allocators */ err = osu_cpu_props_get( &props ); if ( err != OSU_ERR_OK ) { CDBG_PRINT_WARN( CDBG_BASE, "osu_cpu_props_get() failed, available gpu memory will be reported as 0" ); return 0; } return props.available_memory_size; }
mali_error base_uk_ctx_alloc(base_context * base_ctx, u32 flags) { size_t pool_size = BASEP_JCTX_RB_NRPAGES << 12; uku_client_version client_version; uku_open_status open_status; CDBG_ASSERT_POINTER(base_ctx); if ( BDBG_SIMULATE_FAILURE( CDBG_BASE ) ) { return MALI_ERROR_FUNCTION_FAILED; } if(OSU_ERR_OK != osu_sem_init(&base_ctx->shutdown_sem, 0) ) { goto fail_sem_init; } base_ctx->jd_context.size = pool_size; client_version.major = BASE_UK_VERSION_MAJOR; client_version.minor = BASE_UK_VERSION_MINOR; open_status = uku_open(UK_CLIENT_MALI_T600_BASE, 0, &client_version, &base_ctx->uk_ctx); if (UKU_OPEN_OK != open_status) { goto fail_open; } if (MALI_ERROR_NONE != basep_map_ringbuffer(base_ctx)) { goto fail_rb_map; } return MALI_ERROR_NONE; fail_rb_map: uku_close(&base_ctx->uk_ctx); fail_open: osu_sem_term(&base_ctx->shutdown_sem); fail_sem_init: return MALI_ERROR_FUNCTION_FAILED; }
base_tmem_handle base_tmem_alloc_growable(base_context *base_ctx, u32 max_pages, u32 init_pages, u32 extent_pages, u32 flags) { mali_error err; kbase_uk_tmem_alloc tmem; CDBG_ASSERT_POINTER(base_ctx); CDBG_ASSERT(0 == (flags & BASEP_MEM_IS_CACHED)); if ( BDBG_SIMULATE_FAILURE( CDBG_BASE ) ) { return BASE_TMEM_INVALID_HANDLE; } tmem.header.id = KBASE_FUNC_TMEM_ALLOC; tmem.vsize = max_pages; #if MALI_HW_TYPE == 2 tmem.psize = init_pages; #else tmem.psize = max_pages; #endif tmem.extent = extent_pages; tmem.flags = flags; err = uku_call(&base_ctx->uk_ctx, &tmem, sizeof(tmem)); if (MALI_ERROR_NONE == err) { err = tmem.header.ret; if (MALI_ERROR_NONE == err) { return tmem.gpu_addr; } } return 0ULL; }
void base_uk_ctx_free(base_context *base_ctx) { CDBG_ASSERT_POINTER(base_ctx); }
base_jd_atom *base_atom_get_address(const base_context *ctx, size_t offset) { CDBG_ASSERT_POINTER(ctx); return (base_jd_atom *)((uintptr_t)ctx->jd_context.pool + offset); }
static void basep_syncset_dump( base_context *base_ctx, base_syncset *sset ) { mali_error err; kbase_uk_find_cpu_mapping find; CDBG_ASSERT_POINTER( base_ctx ); CDBG_ASSERT_POINTER( sset ); /* Call the kernel module to find the relevant CPU mapping of the memory allocation within which synchronization is required. This call is separate from KBASE_FUNC_SYNC so we can dump syncset information in user mode. */ find.header.id = KBASE_FUNC_FIND_CPU_MAPPING; find.gpu_addr = sset->basep_sset.mem_handle; find.cpu_addr = sset->basep_sset.user_addr; find.size = sset->basep_sset.size; err = uku_call(&base_ctx->uk_ctx, &find, sizeof(find)); if ( MALI_ERROR_NONE == err ) { mali_size64 sync_offset, map_offset; mali_addr64 gpu_va; /* Calculate the offset (in bytes) of the cache coherency operation from the start of the sub-region mapped for CPU access. */ CDBG_ASSERT_LEQ_U( find.uaddr, sset->basep_sset.user_addr ); sync_offset = sset->basep_sset.user_addr - find.uaddr; /* Calculate the offset (in bytes) of the sub-region mapped for CPU access from the start of the allocation. */ CDBG_ASSERT_LEQ_U( find.page_off, U64_MAX >> CONFIG_CPU_PAGE_SIZE_LOG2 ); map_offset = find.page_off << CONFIG_CPU_PAGE_SIZE_LOG2; /* Calculate the GPU virtual address of the start of the memory to be synchronized, assuming the memory handle is actually the GPU virtual address of the start of the allocation. */ CDBG_ASSERT_LEQ_U( map_offset, U64_MAX - sync_offset ); CDBG_ASSERT_LEQ_U( sset->basep_sset.mem_handle, U64_MAX - sync_offset - map_offset ); gpu_va = sset->basep_sset.mem_handle + map_offset + sync_offset; switch ( sset->basep_sset.type ) { case BASE_SYNCSET_OP_MSYNC: cinstr_base_dump_syncset_to_gpu( base_ctx, gpu_va, (void *)(uintptr_t)sset->basep_sset.user_addr, sset->basep_sset.size ); break; case BASE_SYNCSET_OP_CSYNC: cinstr_base_dump_syncset_to_cpu( base_ctx, gpu_va, (void *)(uintptr_t)sset->basep_sset.user_addr, sset->basep_sset.size ); break; default: CDBG_PRINT_ERROR( CDBG_BASE, "Unknown memory coherency operation type" ); break; } }
base_jd_atom *base_bag_get_address(const base_context *ctx, const base_jd_bag *bag) { CDBG_ASSERT_POINTER(ctx); return (struct base_jd_atom *)((char *)ctx->jd_context.pool + bag->offset); }
mali_error base_bag_acquire_buffer(base_context *ctx, base_jd_bag *bag, size_t *req_size) { size_t size; mali_error ret = MALI_ERROR_NONE; CDBG_ASSERT_POINTER(ctx); CDBG_ASSERT_POINTER(req_size); CDBG_ASSERT(0 != *req_size); size = *req_size; bag->size = size; /* Let's be optmistic... */ osu_mutex_lock(&ctx->jd_context.lock); if (ctx->jd_context.tail >= ctx->jd_context.head) { /* fits at the end? */ if (size <= (ctx->jd_context.size - ctx->jd_context.tail)) { bag->offset = ctx->jd_context.tail; ctx->jd_context.tail += size; goto out; } /* fits at the beginning? */ if (size < ctx->jd_context.head) { bag->offset = 0; ctx->jd_context.tail = size; goto out; } /* Failed */ /* check how much we can put there*/ if(0 == ctx->jd_context.head){ *req_size = (ctx->jd_context.size - ctx->jd_context.tail - 1); /* at the end*/ } else{ *req_size = /* at the end, no need to leave one byte here*/ myownmax((ctx->jd_context.size - ctx->jd_context.tail), /* or at the beginning */ ctx->jd_context.head - 1); } } else { /* Fits between tail and head? */ if (size <= (ctx->jd_context.head - ctx->jd_context.tail - 1)) { bag->offset = ctx->jd_context.tail; ctx->jd_context.tail += size; goto out; } /* Failed */ *req_size = (ctx->jd_context.head - ctx->jd_context.tail - 1); } ret = MALI_ERROR_FUNCTION_FAILED; out: if (MALI_ERROR_NONE == ret){ CUTILS_DLIST_PUSH_BACK(&ctx->jd_context.bag_list, bag, base_jd_bag, entry); } osu_mutex_unlock(&ctx->jd_context.lock); return ret; }
/* * Fill the coherency information for Mali-T600 cores * * group_info must point to storage for a mali_base_gpu_coherent_group_info * structure which is up to group_info_size bytes in length. */ STATIC mali_error basep_gpu_props_construct_coherency( struct mali_base_gpu_coherent_group_info * const group_info, u32 group_info_size, base_context *ctx ) { u8 *end_of_mem_ptr; /* For debug checks */ struct mali_base_gpu_coherent_group *current_group; u32 num_groups; CDBG_ASSERT_POINTER( group_info ); CDBG_ASSERT_POINTER( ctx ); end_of_mem_ptr = ((u8*)group_info) + group_info_size; UNUSED( ctx ); /* * This assert assumes at least one coherent group, and structure padding * has been allocated for the single group case */ CDBG_ASSERT_LEQ( (uintptr_t)(group_info+1), (uintptr_t)end_of_mem_ptr ); num_groups = basep_gpu_props_get_num_coherent_groups( ctx ); group_info->num_groups = num_groups; group_info->num_core_groups = get_num_core_groups( ctx ); /* Set coherency, using data structure builder functions. Must be * initialized to zero first. */ STDLIB_MEMSET( &group_info->coherency, 0, sizeof(group_info->coherency) ); midg_mem_features_set_coherent_core_group( &group_info->coherency, BASEP_T600_CORE_GROUP_COHERENT_BOOL ); midg_mem_features_set_coherence_supergroup( &group_info->coherency, BASEP_T600_CORE_SUPERGROUP_COHERENT_BOOL ); current_group = &group_info->group[0]; switch ( BASEP_DUMMY_GPU_CORES ) { case 1: case 2: case 3: case 4: CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr ); current_group->core_mask = (1ULL << BASEP_DUMMY_GPU_CORES) - 1ULL; current_group->num_cores = BASEP_DUMMY_GPU_CORES; current_group->priority_required = BASEP_DUMMY_DEFAULT_JOB_PRIORITY_FOR_COREGROUP; /* NOTE: 1 L2 cache present */ break; case 6: case 8: { u32 cores_per_group = BASEP_DUMMY_GPU_CORES/2; CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr ); current_group->core_mask = (1ULL << cores_per_group) - 1ULL; current_group->num_cores = cores_per_group; current_group->priority_required = BASEP_DUMMY_DEFAULT_JOB_PRIORITY_FOR_COREGROUP; ++current_group; /* * 6-core systems have a 'gap' in the core mask, meaning that * the 2nd coregroup starts at offset 4. * 8-core systems naturally have the 2nd coregroup at offset 4 */ CDBG_ASSERT_LEQ( (uintptr_t)(current_group+1), (uintptr_t)end_of_mem_ptr ); current_group->core_mask = ((1ULL << cores_per_group) - 1ULL) << 4; current_group->num_cores = cores_per_group; current_group->priority_required = BASEP_DUMMY_MIN_JOB_PRIORITY_FOR_COREGROUP; /* NOTE: 2 L2 caches present */ } break; default: CDBG_ASSERT_MSG( MALI_FALSE, "Unreachable\n" ); break; } /* groups_size only used for debug checks */ UNUSED( end_of_mem_ptr ); return MALI_ERROR_NONE; }
/* * Public functions */ const struct mali_base_gpu_props *_mali_base_get_gpu_props( base_context *ctx ) { CDBG_ASSERT_POINTER( ctx ); /* Just return the information cached in the base ctx */ return ctx->gpu_props; }