/**
 * @brief Configure HW counters collection
 */
mali_error kbase_instr_hwcnt_setup(kbase_context *kctx, kbase_uk_hwcnt_setup *setup)
{
	mali_error err = MALI_ERROR_FUNCTION_FAILED;
	kbase_device *kbdev;

	KBASE_DEBUG_ASSERT(NULL != kctx);

	kbdev = kctx->kbdev;
	KBASE_DEBUG_ASSERT(NULL != kbdev);

	if (NULL == setup) {
		/* Bad parameter - abort */
		goto out;
	}

	if (setup->dump_buffer != 0ULL) {
		/* Enable HW counters */
		err = kbase_instr_hwcnt_enable(kctx, setup);
	} else {
		/* Disable HW counters */
		err = kbase_instr_hwcnt_disable(kctx);
	}

 out:
	return err;
}
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
{
	struct kbase_gator_hwcnt_handles *hand;
	struct kbase_uk_hwcnt_setup setup;
	int err;
	uint32_t dump_size = 0, i = 0;
	struct kbase_va_region *reg;
	u64 flags;
	u64 nr_pages;
	u16 va_alignment = 0;

	if (!in_out_info)
		return NULL;

	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
	if (!hand)
		return NULL;

	/* Get the first device */
	hand->kbdev = kbase_find_device(-1);
	if (!hand->kbdev)
		goto free_hand;

	/* Create a kbase_context */
	hand->kctx = kbase_create_context(hand->kbdev, true);
	if (!hand->kctx)
		goto release_device;

	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;

	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
		uint32_t cg, j;
		uint64_t core_mask;

		/* There are 8 hardware counters blocks per core group */
		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
			MALI_MAX_NUM_BLOCKS_PER_GROUP *
			in_out_info->nr_core_groups, GFP_KERNEL);

		if (!in_out_info->hwc_layout)
			goto destroy_context;

		dump_size = in_out_info->nr_core_groups *
			MALI_MAX_NUM_BLOCKS_PER_GROUP *
			MALI_COUNTERS_PER_BLOCK *
			MALI_BYTES_PER_COUNTER;

		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;

			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
				if (core_mask & (1u << j))
					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
				else
					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
			}

			in_out_info->hwc_layout[i++] = TILER_BLOCK;
			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;

			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;

			if (0 == cg)
				in_out_info->hwc_layout[i++] = JM_BLOCK;
			else
				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
		}
	/* If we are using any other device */
	} else {
		uint32_t nr_l2, nr_sc_bits, j;
		uint64_t core_mask;

		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;

		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;

		nr_sc_bits = fls64(core_mask);

		/* The job manager and tiler sets of counters
		 * are always present */
		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);

		if (!in_out_info->hwc_layout)
			goto destroy_context;

		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;

		in_out_info->hwc_layout[i++] = JM_BLOCK;
		in_out_info->hwc_layout[i++] = TILER_BLOCK;

		for (j = 0; j < nr_l2; j++)
			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;

		while (core_mask != 0ull) {
			if ((core_mask & 1ull) != 0ull)
				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
			else
				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
			core_mask >>= 1;
		}
	}

	in_out_info->nr_hwc_blocks = i;

	in_out_info->size = dump_size;

	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR;
	nr_pages = PFN_UP(dump_size);
	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
			&flags, &hand->hwcnt_gpu_va, &va_alignment);
	if (!reg)
		goto free_layout;

	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
			dump_size, &hand->hwcnt_map);

	if (!hand->hwcnt_cpu_va)
		goto free_buffer;

	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
	memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE);

	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
	setup.dump_buffer = hand->hwcnt_gpu_va;
	setup.jm_bm = in_out_info->bitmask[0];
	setup.tiler_bm = in_out_info->bitmask[1];
	setup.shader_bm = in_out_info->bitmask[2];
	setup.mmu_l2_bm = in_out_info->bitmask[3];

	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
	if (err)
		goto free_unmap;

	kbase_instr_hwcnt_clear(hand->kctx);

	return hand;

free_unmap:
	kbase_vunmap(hand->kctx, &hand->hwcnt_map);

free_buffer:
	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);

free_layout:
	kfree(in_out_info->hwc_layout);

destroy_context:
	kbase_destroy_context(hand->kctx);

release_device:
	kbase_release_device(hand->kbdev);

free_hand:
	kfree(hand);

	return NULL;
}