float64_t CCommWordStringKernel::compute_diag(int32_t idx_a)
{
	int32_t alen;
	CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs;
	CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs;

	bool free_av;
	uint16_t* av=l->get_feature_vector(idx_a, alen, free_av);

	float64_t result=0.0 ;
	ASSERT(l==r)
	ASSERT(sizeof(uint16_t)<=sizeof(float64_t))
	ASSERT((1<<(sizeof(uint16_t)*8)) > alen)

	int32_t num_symbols=(int32_t) l->get_num_symbols();
	ASSERT(num_symbols<=dictionary_size)

	int32_t* dic = dict_diagonal_optimization;
	memset(dic, 0, num_symbols*sizeof(int32_t));

	for (int32_t i=0; i<alen; i++)
		dic[av[i]]++;

	if (use_sign)
	{
		for (int32_t i=0; i<(int32_t) l->get_num_symbols(); i++)
		{
			if (dic[i]!=0)
				result++;
		}
	}
	else
	{
		for (int32_t i=0; i<num_symbols; i++)
		{
			if (dic[i]!=0)
				result+=dic[i]*dic[i];
		}
	}
	l->free_feature_vector(av, idx_a, free_av);

	return result;
}