void exynos_hwcnt_init(struct kbase_device *kbdev) { struct kbase_uk_hwcnt_setup setup_arg; struct kbase_context *kctx; struct kbase_uk_mem_alloc mem; struct kbase_va_region *reg; struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; if (platform->hwcnt_gathering_status == false) goto out; kctx = kbase_create_context(kbdev, false); if (kctx) { kbdev->hwcnt.kctx = kctx; } else { GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "hwcnt error!, hwcnt_init is failed\n"); goto out; } mem.va_pages = mem.commit_pages = mem.extent = 1; mem.flags = BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_RD | BASE_MEM_HINT_CPU_RD; reg = kbase_mem_alloc(kctx, mem.va_pages, mem.commit_pages, mem.extent, &mem.flags, &mem.gpu_va, &mem.va_alignment); #if defined(CONFIG_64BIT) kbase_gpu_vm_lock(kctx); if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, 1, 1)) { kbase_gpu_vm_unlock(kctx); platform->hwcnt_gathering_status = false; GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "exynos_hwcnt_init error!mmap fail\n"); kbase_mem_free(kbdev->hwcnt.kctx, kbdev->hwcnt.suspended_state.dump_buffer); goto out; } kbase_gpu_vm_unlock(kctx); #endif kctx->kbdev->hwcnt.phy_addr = reg->alloc->pages[0]; kctx->kbdev->hwcnt.enable_for_utilization = FALSE; kctx->kbdev->hwcnt.enable_for_gpr = FALSE; kctx->kbdev->hwcnt.suspended_kctx = NULL; kctx->kbdev->hwcnt.timeout = msecs_to_jiffies(100); kctx->kbdev->hwcnt.is_powered = FALSE; mutex_init(&kbdev->hwcnt.mlock); #if defined(CONFIG_64BIT) setup_arg.dump_buffer = reg->start_pfn << PAGE_SHIFT; #else setup_arg.dump_buffer = mem.gpu_va; #endif setup_arg.jm_bm = platform->hwcnt_choose_jm; setup_arg.shader_bm = platform->hwcnt_choose_shader; setup_arg.tiler_bm = platform->hwcnt_choose_tiler; setup_arg.l3_cache_bm = platform->hwcnt_choose_l3_cache; setup_arg.mmu_l2_bm = platform->hwcnt_choose_mmu_l2; setup_arg.padding = HWC_MODE_UTILIZATION; kctx->kbdev->hwcnt.kspace_addr = kbase_kmap_from_physical_address(kbdev); if (MALI_ERROR_NONE != hwcnt_setup(kctx, &setup_arg)) { GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "hwcnt_setup is failed\n"); goto out; } kctx->kbdev->hwcnt.acc_buffer = kmalloc(HWC_ACC_BUFFER_SIZE, GFP_KERNEL); if (kctx->kbdev->hwcnt.acc_buffer) memset(kctx->kbdev->hwcnt.acc_buffer, 0, HWC_ACC_BUFFER_SIZE); else goto out; kbdev->hwcnt.is_init = TRUE; if(kbdev->pm.pm_current_policy->id == KBASE_PM_POLICY_ID_ALWAYS_ON) { mutex_lock(&kbdev->hwcnt.mlock); if (!kbdev->hwcnt.kctx) hwcnt_start(kbdev); mutex_unlock(&kbdev->hwcnt.mlock); } return; out: kbdev->hwcnt.is_init = FALSE; return; }
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; struct kbase_uk_hwcnt_setup setup; int err; uint32_t dump_size = 0, i = 0; struct kbase_va_region *reg; u64 flags; u64 nr_pages; u16 va_alignment = 0; if (!in_out_info) return NULL; hand = kzalloc(sizeof(*hand), GFP_KERNEL); if (!hand) return NULL; /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; /* Create a kbase_context */ hand->kctx = kbase_create_context(hand->kbdev, true); if (!hand->kctx) goto release_device; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { uint32_t cg, j; uint64_t core_mask; /* There are 8 hardware counters blocks per core group */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * MALI_MAX_NUM_BLOCKS_PER_GROUP * in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { if (core_mask & (1u << j)) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } in_out_info->hwc_layout[i++] = TILER_BLOCK; in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; in_out_info->hwc_layout[i++] = RESERVED_BLOCK; if (0 == cg) in_out_info->hwc_layout[i++] = JM_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } /* If we are using any other device */ } else { uint32_t nr_l2, nr_sc_bits, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; nr_sc_bits = fls64(core_mask); /* The job manager and tiler sets of counters * are always present */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; for (j = 0; j < nr_l2; j++) in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; while (core_mask != 0ull) { if ((core_mask & 1ull) != 0ull) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; core_mask >>= 1; } } in_out_info->nr_hwc_blocks = i; in_out_info->size = dump_size; flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; nr_pages = PFN_UP(dump_size); reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, &flags, &hand->hwcnt_gpu_va, &va_alignment); if (!reg) goto free_layout; hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, dump_size, &hand->hwcnt_map); if (!hand->hwcnt_cpu_va) goto free_buffer; in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ setup.dump_buffer = hand->hwcnt_gpu_va; setup.jm_bm = in_out_info->bitmask[0]; setup.tiler_bm = in_out_info->bitmask[1]; setup.shader_bm = in_out_info->bitmask[2]; setup.mmu_l2_bm = in_out_info->bitmask[3]; err = kbase_instr_hwcnt_enable(hand->kctx, &setup); if (err) goto free_unmap; kbase_instr_hwcnt_clear(hand->kctx); return hand; free_unmap: kbase_vunmap(hand->kctx, &hand->hwcnt_map); free_buffer: kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); free_layout: kfree(in_out_info->hwc_layout); destroy_context: kbase_destroy_context(hand->kctx); release_device: kbase_release_device(hand->kbdev); free_hand: kfree(hand); return NULL; }
mali_error kbasep_8401_workaround_init(kbase_device * const kbdev) { kbasep_js_device_data *js_devdata; kbase_context *workaround_kctx; int i; u16 as_present_mask; KBASE_DEBUG_ASSERT(kbdev); KBASE_DEBUG_ASSERT(kbdev->workaround_kctx == NULL); js_devdata = &kbdev->js_data; /* For this workaround we reserve one address space to allow us to * submit a special job independent of other contexts */ --(kbdev->nr_hw_address_spaces); /* Only update nr_user_address_spaces if it was unchanged - to ensure * HW workarounds that have modified this will still work */ if (kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces + 1)) --(kbdev->nr_user_address_spaces); KBASE_DEBUG_ASSERT(kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces); /* Recalculate the free address spaces bit-pattern */ as_present_mask = (1U << kbdev->nr_hw_address_spaces) - 1; js_devdata->as_free &= as_present_mask; workaround_kctx = kbase_create_context(kbdev); if (!workaround_kctx) return MALI_ERROR_FUNCTION_FAILED; /* Allocate the pages required to contain the job */ if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, 0)) goto no_pages; /* Get virtual address of mapped memory and write a compute job for each page */ for (i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) { kbdev->workaround_compute_job_va[i] = kmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i]))); if (NULL == kbdev->workaround_compute_job_va[i]) goto page_free; /* Generate the compute job data */ kbasep_8401_workaround_update_job_pointers((u32 *) kbdev->workaround_compute_job_va[i], i); } /* Insert pages to the gpu mmu. */ kbase_gpu_vm_lock(workaround_kctx); kbase_mmu_insert_pages(workaround_kctx, /* vpfn = page number */ (u64) WORKAROUND_PAGE_OFFSET, /* physical address */ kbdev->workaround_compute_job_pa, /* number of pages */ KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, /* flags */ KBASE_REG_GPU_RD | KBASE_REG_CPU_RD | KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); kbase_gpu_vm_unlock(workaround_kctx); kbdev->workaround_kctx = workaround_kctx; return MALI_ERROR_NONE; page_free: while (i--) kunmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i]))); kbase_mem_allocator_free(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, MALI_TRUE); no_pages: kbase_destroy_context(workaround_kctx); return MALI_ERROR_FUNCTION_FAILED; }
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; struct kbase_uk_hwcnt_setup setup; mali_error err; uint32_t dump_size = 0, i = 0; struct kbase_va_region *reg; u64 flags; u64 nr_pages; u16 va_alignment = 0; if (!in_out_info) return NULL; hand = kzalloc(sizeof(*hand), GFP_KERNEL); if (!hand) return NULL; /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; /* Create a kbase_context */ hand->kctx = kbase_create_context(hand->kbdev, true); if (!hand->kctx) goto release_device; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; /* If we are using a Mali-T6xx or Mali-T72x device */ if (in_out_info->gpu_id == GPU_ID_PI_T60X || in_out_info->gpu_id == GPU_ID_PI_T62X || in_out_info->gpu_id == GPU_ID_PI_T72X) { uint32_t cg, j; uint64_t core_mask; /* There are 8 hardware counters blocks per core group */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * MALI_MAX_NUM_BLOCKS_PER_GROUP * in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { if (core_mask & (1u << j)) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } in_out_info->hwc_layout[i++] = TILER_BLOCK; in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; /* There are no implementation with L3 cache */ in_out_info->hwc_layout[i++] = RESERVED_BLOCK; if (0 == cg) in_out_info->hwc_layout[i++] = JM_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; } /* If we are using a Mali-T76x device */ } else if ( (in_out_info->gpu_id == GPU_ID_PI_T76X) #ifdef MALI_INCLUDE_TFRX || (in_out_info->gpu_id == GPU_ID_PI_TFRX) #endif /* MALI_INCLUDE_TFRX */ || (in_out_info->gpu_id == GPU_ID_PI_T86X) #ifdef MALI_INCLUDE_TGAL || (in_out_info->gpu_id == GPU_ID_PI_TGAL) #endif ) { uint32_t nr_l2, nr_sc, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; /* For Mali-T76x, the job manager and tiler sets of counters are always present */ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; for (j = 0; j < nr_l2; j++) in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; while (core_mask != 0ull) { if ((core_mask & 1ull) != 0ull) in_out_info->hwc_layout[i++] = SHADER_BLOCK; else in_out_info->hwc_layout[i++] = RESERVED_BLOCK; core_mask >>= 1; } }
mali_error kbasep_8401_workaround_init(kbase_device *kbdev) { kbasep_js_device_data *js_devdata; kbase_context *workaround_kctx; u32 count; int i; u16 as_present_mask; OSK_ASSERT(kbdev); OSK_ASSERT(kbdev->workaround_kctx == NULL); js_devdata = &kbdev->js_data; /* For this workaround we reserve one address space to allow us to * submit a special job independent of other contexts */ --(kbdev->nr_hw_address_spaces); if ( kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces + 1) ) { /* Only update nr_user_address_spaces if it was unchanged - to ensure * HW workarounds that have modified this will still work */ --(kbdev->nr_user_address_spaces); } OSK_ASSERT( kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces ); /* Recalculate the free address spaces bit-pattern */ as_present_mask = (1U << kbdev->nr_hw_address_spaces) - 1; js_devdata->as_free &= as_present_mask; workaround_kctx = kbase_create_context(kbdev); if(!workaround_kctx) { return MALI_ERROR_FUNCTION_FAILED; } /* Allocate the pages required to contain the job */ count = kbase_phy_pages_alloc(workaround_kctx->kbdev, &workaround_kctx->pgd_allocator, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa); if(count < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT) { goto page_release; } /* Get virtual address of mapped memory and write a compute job for each page */ for(i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) { kbdev->workaround_compute_job_va[i] = osk_kmap(kbdev->workaround_compute_job_pa[i]); if(NULL == kbdev->workaround_compute_job_va[i]) { goto page_free; } /* Generate the compute job data */ kbasep_8401_workaround_update_job_pointers((u32*)kbdev->workaround_compute_job_va[i], i); } /* Insert pages to the gpu mmu. */ kbase_mmu_insert_pages(workaround_kctx, /* vpfn = page number */ (u64)WORKAROUND_PAGE_OFFSET, /* physical address */ kbdev->workaround_compute_job_pa, /* number of pages */ KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, /* flags */ KBASE_REG_GPU_RD|KBASE_REG_CPU_RD|KBASE_REG_CPU_WR|KBASE_REG_GPU_WR); kbdev->workaround_kctx = workaround_kctx; return MALI_ERROR_NONE; page_free: while(i--) { osk_kunmap(kbdev->workaround_compute_job_pa[i], kbdev->workaround_compute_job_va[i]); } page_release: kbase_phy_pages_free(kbdev, &workaround_kctx->pgd_allocator, count, kbdev->workaround_compute_job_pa); kbase_destroy_context(workaround_kctx); return MALI_ERROR_FUNCTION_FAILED; }
mali_error kbasep_8401_workaround_init(kbase_device *kbdev) { kbase_context *workaround_kctx; u32 count; int i; OSK_ASSERT(kbdev); OSK_ASSERT(kbdev->workaround_kctx == NULL); /* For this workaround we reserve one address space to allow us to * submit a special job independent of other contexts */ kbdev->nr_address_spaces--; workaround_kctx = kbase_create_context(kbdev); if(!workaround_kctx) { return MALI_ERROR_FUNCTION_FAILED; } /* Allocate the pages required to contain the job */ count = kbase_phy_pages_alloc(workaround_kctx->kbdev, &workaround_kctx->pgd_allocator, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa); if(count < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT) { goto page_release; } /* Get virtual address of mapped memory and write a compute job for each page */ for(i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) { kbdev->workaround_compute_job_va[i] = osk_kmap(kbdev->workaround_compute_job_pa[i]); if(NULL == kbdev->workaround_compute_job_va[i]) { goto page_free; } /* Generate the compute job data */ kbasep_8401_workaround_update_job_pointers((u32*)kbdev->workaround_compute_job_va[i], i); } /* Insert pages to the gpu mmu. */ kbase_mmu_insert_pages(workaround_kctx, /* vpfn = page number */ (u64)WORKAROUND_PAGE_OFFSET, /* physical address */ kbdev->workaround_compute_job_pa, /* number of pages */ KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, /* flags */ KBASE_REG_CPU_RW|KBASE_REG_GPU_RW); kbdev->workaround_kctx = workaround_kctx; return MALI_ERROR_NONE; page_free: while(i--) { osk_kunmap(kbdev->workaround_compute_job_pa[i], kbdev->workaround_compute_job_va[i]); } page_release: kbase_phy_pages_free(kbdev, &workaround_kctx->pgd_allocator, count, kbdev->workaround_compute_job_pa); kbase_destroy_context(workaround_kctx); return MALI_ERROR_FUNCTION_FAILED; }