void kbase_gpuprops_set(kbase_device *kbdev)
{
	kbase_gpu_props *gpu_props;
	struct midg_raw_gpu_props *raw;

	KBASE_DEBUG_ASSERT(NULL != kbdev);
	gpu_props = &kbdev->gpu_props;
	raw = &gpu_props->props.raw_props;

	/* Initialize the base_gpu_props structure from the hardware */
	kbase_gpuprops_get_props(&gpu_props->props, kbdev);

	/* Populate the derived properties */
	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);

	/* Populate kbase-only fields */
	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);

	gpu_props->l3_props.associativity = KBASE_UBFX32(raw->l3_features, 8U, 8);
	gpu_props->l3_props.external_bus_width = KBASE_UBFX32(raw->l3_features, 24U, 8);

	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
	gpu_props->mem.supergroup = KBASE_UBFX32(raw->mem_features, 1U, 1);

	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);

	gpu_props->num_cores = hweight64(raw->shader_present);
	gpu_props->num_core_groups = hweight64(raw->l2_present);
	gpu_props->num_supergroups = hweight64(raw->l3_present);
	gpu_props->num_address_spaces = hweight32(raw->as_present);
	gpu_props->num_job_slots = hweight32(raw->js_present);
}
Ejemplo n.º 2
0
/* Count the number of free inodes. */
static unsigned int
xchk_iallocbt_freecount(
	xfs_inofree_t			freemask)
{
	BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64));
	return hweight64(freemask);
}
Ejemplo n.º 3
0
static int init_cgr(struct device *qidev)
{
	int ret;
	struct qm_mcc_initcgr opts;
	const u64 cpus = *(u64 *)qman_affine_cpus();
	const int num_cpus = hweight64(cpus);
	const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;

	ret = qman_alloc_cgrid(&qipriv.cgr.cgrid);
	if (ret) {
		dev_err(qidev, "CGR alloc failed for rsp FQs: %d\n", ret);
		return ret;
	}

	qipriv.cgr.cb = cgr_cb;
	memset(&opts, 0, sizeof(opts));
	opts.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES |
				   QM_CGR_WE_MODE);
	opts.cgr.cscn_en = QM_CGR_EN;
	opts.cgr.mode = QMAN_CGR_MODE_FRAME;
	qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, val, 1);

	ret = qman_create_cgr(&qipriv.cgr, QMAN_CGR_FLAG_USE_INIT, &opts);
	if (ret) {
		dev_err(qidev, "Error %d creating CAAM CGRID: %u\n", ret,
			qipriv.cgr.cgrid);
		return ret;
	}

	dev_info(qidev, "Congestion threshold set to %llu\n", val);
	return 0;
}
Ejemplo n.º 4
0
void hweight64_test()
{
	for (int i = 0; i < 100000; ++i) {
		uint64_t r = RAND_NR_NEXT(u, v, w);
		assert(__builtin_popcountll(r) == hweight64(r));
	}
}
Ejemplo n.º 5
0
Archivo: bit.cpp Proyecto: 0xCAB/ulib
int main()
{
	uint64_t u, v, w;
	uint64_t seed = time(0);

	RAND_NR_INIT(u, v, w, seed);

	for (int i = 0; i < 100; i++)
		printf("rand number = %llx\n", (unsigned long long)RAND_NR_NEXT(u, v, w));

	uint64_t r = RAND_NR_NEXT(u, v, w);
	uint64_t s = BIN_TO_GRAYCODE(r);
	uint64_t t = BIN_TO_GRAYCODE(r + 1);
	assert(hweight64(t ^ s) == 1);
	GRAYCODE_TO_BIN64(s);

	assert(rev8(5) == 160);
	assert(rev8_hakmem(5) == 160);

	uint64_t hi, lo;
	MULQ(0x1234567887654321ul, 0x77665544332211fful, lo, hi);
	assert(hi == 611815671993850618UL);
	assert(lo == 14353276178066116319UL);

	if (s != r)
		fprintf(stderr, "expected %016llx, acutal %016llx\n",
			(unsigned long long)r,
			(unsigned long long)s);
	else
		printf("passed\n");

	return 0;
}
Ejemplo n.º 6
0
unsigned int hweight64_time()
{
	unsigned int r = 0;

	for (uint64_t i = 0; i < 100000000; ++i)
		r += hweight64(i);
	return r;
}
Ejemplo n.º 7
0
static int gru_seq_show(struct seq_file *file, void *data)
{
	long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
	struct gru_state *gru = GID_TO_GRU(gid);

	if (gid == 0) {
		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
			   "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
			   "busy", "busy", "free", "free", "free");
	}
	if (gru) {
		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
		cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
		dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
		seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
			   gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
			   GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
			   ctxfree, cbrfree, dsrfree);
	}

	return 0;
}
Ejemplo n.º 8
0
static int alloc_cgrs(struct device *qidev)
{
	struct qm_mcc_initcgr opts;
	int ret;
	const u64 cpus = *(u64 *)qman_affine_cpus();
	const int num_cpus = hweight64(cpus);
	u64 val;

	/*Allocate response CGR*/
	ret = qman_alloc_cgrid(&qipriv.rsp_cgr.cgrid);
	if (ret) {
		dev_err(qidev, "CGR alloc failed for rsp FQs");
		return ret;
	}

	qipriv.rsp_cgr.cb = rsp_cgr_cb;
	memset(&opts, 0, sizeof(opts));
	opts.we_mask = QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES |
			QM_CGR_WE_MODE;
	opts.cgr.cscn_en = QM_CGR_EN;
	opts.cgr.mode = QMAN_CGR_MODE_FRAME;
#ifdef CONFIG_FSL_DPAA_ETH
	/*
	 * This effectively sets the to-CPU threshold equal to half of the
	 * number of buffers available to dpa_eth driver. It means that at most
	 * half of the buffers can be in the queues from SEC, waiting
	 * to be transmitted to the core (and then on the TX queues).
	 * NOTE: This is an arbitrary division; the factor '2' below could
	 *       also be '3' or '4'. It also depends on the number of devices
	 *       using the dpa_eth buffers (which can be >1 if f.i. PME/DCE are
	 *       also used.
	 */
	val = num_cpus * CONFIG_FSL_DPAA_ETH_MAX_BUF_COUNT / 2;
#else
	val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
#endif
	qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, val, 1);

	ret = qman_create_cgr(&qipriv.rsp_cgr,
				QMAN_CGR_FLAG_USE_INIT, &opts);
	if (ret) {
		dev_err(qidev, "Error %d creating CAAM rsp CGRID: %u\n",
			ret, qipriv.rsp_cgr.cgrid);
		return ret;
	}
#ifdef DEBUG
	dev_info(qidev, "CAAM to CPU threshold set to %llu\n", val);
#endif
	return 0;
}
Ejemplo n.º 9
0
Archivo: verbs.c Proyecto: 020gzh/linux
/*
 * Advance the clean counter.  When the clean period has expired,
 * clean an entry.
 *
 * This is implemented in atomics to avoid locking.  Because multiple
 * variables are involved, it can be racy which can lead to slightly
 * inaccurate information.  Since this is only a heuristic, this is
 * OK.  Any innaccuracies will clean themselves out as the counter
 * advances.  That said, it is unlikely the entry clean operation will
 * race - the next possible racer will not start until the next clean
 * period.
 *
 * The clean counter is implemented as a decrement to zero.  When zero
 * is reached an entry is cleaned.
 */
static void wss_advance_clean_counter(void)
{
	int entry;
	int weight;
	unsigned long bits;

	/* become the cleaner if we decrement the counter to zero */
	if (atomic_dec_and_test(&wss.clean_counter)) {
		/*
		 * Set, not add, the clean period.  This avoids an issue
		 * where the counter could decrement below the clean period.
		 * Doing a set can result in lost decrements, slowing the
		 * clean advance.  Since this a heuristic, this possible
		 * slowdown is OK.
		 *
		 * An alternative is to loop, advancing the counter by a
		 * clean period until the result is > 0. However, this could
		 * lead to several threads keeping another in the clean loop.
		 * This could be mitigated by limiting the number of times
		 * we stay in the loop.
		 */
		atomic_set(&wss.clean_counter, wss_clean_period);

		/*
		 * Uniquely grab the entry to clean and move to next.
		 * The current entry is always the lower bits of
		 * wss.clean_entry.  The table size, wss.num_entries,
		 * is always a power-of-2.
		 */
		entry = (atomic_inc_return(&wss.clean_entry) - 1)
			& (wss.num_entries - 1);

		/* clear the entry and count the bits */
		bits = xchg(&wss.entries[entry], 0);
		weight = hweight64((u64)bits);
		/* only adjust the contended total count if needed */
		if (weight)
			atomic_sub(weight, &wss.total_count);
	}
}
Ejemplo n.º 10
0
static int
default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
{
	pfm_default_smpl_hdr_t *hdr;
	pfm_default_smpl_entry_t *ent;
	void *cur, *last;
	unsigned long *e, entry_size;
	unsigned int npmds, i;
	unsigned char ovfl_pmd;
	unsigned char ovfl_notify;

	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
		return -EINVAL;
	}

	hdr         = (pfm_default_smpl_hdr_t *)buf;
	cur         = buf+hdr->hdr_cur_offs;
	last        = buf+hdr->hdr_buf_size;
	ovfl_pmd    = arg->ovfl_pmd;
	ovfl_notify = arg->ovfl_notify;

	/*
	 * precheck for sanity
	 */
	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;

	npmds = hweight64(arg->smpl_pmds[0]);

	ent = (pfm_default_smpl_entry_t *)cur;

	prefetch(arg->smpl_pmds_values);

	entry_size = sizeof(*ent) + (npmds << 3);

	/* position for first pmd */
	e = (unsigned long *)(ent+1);

	hdr->hdr_count++;

	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
			task->pid,
			hdr->hdr_count,
			cur, last,
			last-cur,
			ovfl_pmd,
			ovfl_notify, npmds));

	/*
	 * current = task running at the time of the overflow.
	 *
	 * per-task mode:
	 * 	- this is usually the task being monitored.
	 * 	  Under certain conditions, it might be a different task
	 *
	 * system-wide:
	 * 	- this is not necessarily the task controlling the session
	 */
	ent->pid            = current->pid;
	ent->ovfl_pmd  	    = ovfl_pmd;
	ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;

	/*
	 * where did the fault happen (includes slot number)
	 */
	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);

	ent->tstamp    = stamp;
	ent->cpu       = smp_processor_id();
	ent->set       = arg->active_set;
	ent->tgid      = current->tgid;

	/*
	 * selectively store PMDs in increasing index number
	 */
	if (npmds) {
		unsigned long *val = arg->smpl_pmds_values;
		for(i=0; i < npmds; i++) {
			*e++ = *val++;
		}
	}

	/*
	 * update position for next entry
	 */
	hdr->hdr_cur_offs += entry_size;
	cur               += entry_size;

	/*
	 * post check to avoid losing the last sample
	 */
	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;

	/*
	 * keep same ovfl_pmds, ovfl_notify
	 */
	arg->ovfl_ctrl.bits.notify_user     = 0;
	arg->ovfl_ctrl.bits.block_task      = 0;
	arg->ovfl_ctrl.bits.mask_monitoring = 0;
	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */

	return 0;
full:
	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));

	/*
	 * increment number of buffer overflow.
	 * important to detect duplicate set of samples.
	 */
	hdr->hdr_overflows++;

	/*
	 * if no notification requested, then we saturate the buffer
	 */
	if (ovfl_notify == 0) {
		arg->ovfl_ctrl.bits.notify_user     = 0;
		arg->ovfl_ctrl.bits.block_task      = 0;
		arg->ovfl_ctrl.bits.mask_monitoring = 1;
		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
	} else {
		arg->ovfl_ctrl.bits.notify_user     = 1;
		arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for non-blocking context */
		arg->ovfl_ctrl.bits.mask_monitoring = 1;
		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
	}
	return -1; /* we are full, sorry */
}
STATIC void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
{
	struct mali_base_gpu_coherent_group *current_group;
	u64 group_present;
	u64 group_mask;
	u64 first_set, first_set_prev;
	u32 num_groups = 0;

	KBASE_DEBUG_ASSERT(NULL != props);

	props->coherency_info.coherency = props->raw_props.mem_features;
	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);

	if (props->coherency_info.coherency & GROUPS_L3_COHERENT) {
		/* Group is l3 coherent */
		group_present = props->raw_props.l3_present;
	} else if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
		/* Group is l2 coherent */
		group_present = props->raw_props.l2_present;
	} else {
		/* Group is l1 coherent */
		group_present = props->raw_props.shader_present;
	}

	/*
	 * The coherent group mask can be computed from the l2/l3 present
	 * register.
	 *
	 * For the coherent group n:
	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
	 * where first_set is group_present with only its nth set-bit kept
	 * (i.e. the position from where a new group starts).
	 *
	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
	 * The first mask is:
	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
	 *               =  0x0..00f
	 * The second mask is:
	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
	 *               =  0x0..0f0
	 * And so on until all the bits from group_present have been cleared
	 * (i.e. there is no group left).
	 */

	current_group = props->coherency_info.group;
	first_set = group_present & ~(group_present - 1);

	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
		group_present -= first_set;	/* Clear the current group bit */
		first_set_prev = first_set;

		first_set = group_present & ~(group_present - 1);
		group_mask = (first_set - 1) & ~(first_set_prev - 1);

		/* Populate the coherent_group structure for each group */
		current_group->core_mask = group_mask & props->raw_props.shader_present;
		current_group->num_cores = hweight64(current_group->core_mask);

		num_groups++;
		current_group++;
	}

	if (group_present != 0)
		KBASE_DEBUG_PRINT_WARN(KBASE_CORE, "Too many coherent groups (keeping only %d groups).", BASE_MAX_COHERENT_GROUPS);

	props->coherency_info.num_groups = num_groups;
}
static int
default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
{
	pfm_default_smpl_hdr_t *hdr;
	pfm_default_smpl_entry_t *ent;
	void *cur, *last;
	unsigned long *e, entry_size;
	unsigned int npmds, i;
	unsigned char ovfl_pmd;
	unsigned char ovfl_notify;

	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
		return -EINVAL;
	}

	hdr         = (pfm_default_smpl_hdr_t *)buf;
	cur         = buf+hdr->hdr_cur_offs;
	last        = buf+hdr->hdr_buf_size;
	ovfl_pmd    = arg->ovfl_pmd;
	ovfl_notify = arg->ovfl_notify;

	/*
                       
  */
	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;

	npmds = hweight64(arg->smpl_pmds[0]);

	ent = (pfm_default_smpl_entry_t *)cur;

	prefetch(arg->smpl_pmds_values);

	entry_size = sizeof(*ent) + (npmds << 3);

	/*                        */
	e = (unsigned long *)(ent+1);

	hdr->hdr_count++;

	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
			task->pid,
			hdr->hdr_count,
			cur, last,
			last-cur,
			ovfl_pmd,
			ovfl_notify, npmds));

	/*
                                                       
   
                  
                                                
                                                             
   
                
                                                               
  */
	ent->pid            = current->pid;
	ent->ovfl_pmd  	    = ovfl_pmd;
	ent->last_reset_val = arg->pmd_last_reset; //                          

	/*
                                                     
  */
	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);

	ent->tstamp    = stamp;
	ent->cpu       = smp_processor_id();
	ent->set       = arg->active_set;
	ent->tgid      = current->tgid;

	/*
                                                     
  */
	if (npmds) {
		unsigned long *val = arg->smpl_pmds_values;
		for(i=0; i < npmds; i++) {
			*e++ = *val++;
		}
	}

	/*
                                  
  */
	hdr->hdr_cur_offs += entry_size;
	cur               += entry_size;

	/*
                                              
  */
	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;

	/*
                                    
  */
	arg->ovfl_ctrl.bits.notify_user     = 0;
	arg->ovfl_ctrl.bits.block_task      = 0;
	arg->ovfl_ctrl.bits.mask_monitoring = 0;
	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /*                                               */

	return 0;
full:
	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));

	/*
                                        
                                                 
  */
	hdr->hdr_overflows++;

	/*
                                                             
  */
	if (ovfl_notify == 0) {
		arg->ovfl_ctrl.bits.notify_user     = 0;
		arg->ovfl_ctrl.bits.block_task      = 0;
		arg->ovfl_ctrl.bits.mask_monitoring = 1;
		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
	} else {
		arg->ovfl_ctrl.bits.notify_user     = 1;
		arg->ovfl_ctrl.bits.block_task      = 1; /*                                  */
		arg->ovfl_ctrl.bits.mask_monitoring = 1;
		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /*              */
	}
	return -1; /*                    */
}