Beispiel #1
0
template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
		int32_t vec_idx2)
{
	ASSERT(df);
	ASSERT(df->get_feature_type() == get_feature_type());
	ASSERT(df->get_feature_class() == get_feature_class());
	CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df;

	int32_t len1, len2;
	bool free1, free2;

	ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
	ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);

	float64_t result = SGVector<ST>::dot(vec1, vec2, len1);

	free_feature_vector(vec1, vec_idx1, free1);
	sf->free_feature_vector(vec2, vec_idx2, free2);

	return result;
}
Beispiel #2
0
void CKMeans::clustknb(bool use_old_mus, float64_t *mus_start)
{
	ASSERT(distance && distance->get_feature_type()==F_DREAL);
	CDenseFeatures<float64_t>* lhs = (CDenseFeatures<float64_t>*) distance->get_lhs();
	ASSERT(lhs && lhs->get_num_features()>0 && lhs->get_num_vectors()>0);

	int32_t XSize=lhs->get_num_vectors();
	dimensions=lhs->get_num_features();
	int32_t i, changed=1;
	const int32_t XDimk=dimensions*k;
	int32_t iter=0;

	R=SGVector<float64_t>(k);

	mus=SGMatrix<float64_t>(dimensions, k);

	int32_t *ClList=SG_CALLOC(int32_t, XSize);
	float64_t *weights_set=SG_CALLOC(float64_t, k);
	float64_t *dists=SG_CALLOC(float64_t, k*XSize);

	///replace rhs feature vectors
	CDenseFeatures<float64_t>* rhs_mus = new CDenseFeatures<float64_t>(0);
	CFeatures* rhs_cache = distance->replace_rhs(rhs_mus);

	int32_t vlen=0;
	bool vfree=false;
	float64_t* vec=NULL;

	/* ClList=zeros(XSize,1) ; */
	memset(ClList, 0, sizeof(int32_t)*XSize);
	/* weights_set=zeros(k,1) ; */
	memset(weights_set, 0, sizeof(float64_t)*k);

	/* cluster_centers=zeros(dimensions, k) ; */
	memset(mus.matrix, 0, sizeof(float64_t)*XDimk);

	if (!use_old_mus)
	{
		for (i=0; i<XSize; i++)
		{
			const int32_t Cl=CMath::random(0, k-1);
			int32_t j;
			float64_t weight=Weights.vector[i];

			weights_set[Cl]+=weight;
			ClList[i]=Cl;

			vec=lhs->get_feature_vector(i, vlen, vfree);

			for (j=0; j<dimensions; j++)
				mus.matrix[Cl*dimensions+j] += weight*vec[j];

			lhs->free_feature_vector(vec, i, vfree);
		}
		for (i=0; i<k; i++)
		{
			int32_t j;

			if (weights_set[i]!=0.0)
				for (j=0; j<dimensions; j++)
					mus.matrix[i*dimensions+j] /= weights_set[i];
		}
	}
	else
	{
		ASSERT(mus_start);

		/// set rhs to mus_start
		rhs_mus->copy_feature_matrix(SGMatrix<float64_t>(mus_start,dimensions,k));
		float64_t* p_dists=dists;

		for(int32_t idx=0;idx<XSize;idx++,p_dists+=k)
			distances_rhs(p_dists,0,k,idx);
		p_dists=NULL;

		for (i=0; i<XSize; i++)
		{
			float64_t mini=dists[i*k];
			int32_t Cl = 0, j;

			for (j=1; j<k; j++)
			{
				if (dists[i*k+j]<mini)
				{
					Cl=j;
					mini=dists[i*k+j];
				}
			}
			ClList[i]=Cl;
		}

		/* Compute the sum of all points belonging to a cluster
		 * and count the points */
		for (i=0; i<XSize; i++)
		{
			const int32_t Cl = ClList[i];
			float64_t weight=Weights.vector[i];
			weights_set[Cl]+=weight;
#ifndef MUSRECALC
			vec=lhs->get_feature_vector(i, vlen, vfree);

			for (j=0; j<dimensions; j++)
				mus.matrix[Cl*dimensions+j] += weight*vec[j];

			lhs->free_feature_vector(vec, i, vfree);
#endif
		}
#ifndef MUSRECALC
		/* normalization to get the mean */
		for (i=0; i<k; i++)
		{
			if (weights_set[i]!=0.0)
			{
				int32_t j;
				for (j=0; j<dimensions; j++)
					mus.matrix[i*dimensions+j] /= weights_set[i];
			}
		}
#endif
	}



	while (changed && (iter<max_iter))
	{
		iter++;
		if (iter==max_iter-1)
			SG_WARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1);

		if (iter%1000 == 0)
			SG_INFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed);
		changed=0;

#ifdef MUSRECALC
		/* mus=zeros(dimensions, k) ; */
		memset(mus.matrix, 0, sizeof(float64_t)*XDimk);

		for (i=0; i<XSize; i++)
		{
			int32_t j;
			int32_t Cl=ClList[i];
			float64_t weight=Weights.vector[i];

			vec=lhs->get_feature_vector(i, vlen, vfree);

			for (j=0; j<dimensions; j++)
				mus.matrix[Cl*dimensions+j] += weight*vec[j];

			lhs->free_feature_vector(vec, i, vfree);
		}
		for (i=0; i<k; i++)
		{
			int32_t j;

			if (weights_set[i]!=0.0)
				for (j=0; j<dimensions; j++)
					mus.matrix[i*dimensions+j] /= weights_set[i];
		}
#endif
		///update rhs
		rhs_mus->copy_feature_matrix(mus);

		for (i=0; i<XSize; i++)
		{
			/* ks=ceil(rand(1,XSize)*XSize) ; */
			const int32_t Pat= CMath::random(0, XSize-1);
			const int32_t ClList_Pat=ClList[Pat];
			int32_t imini, j;
			float64_t mini, weight;

			weight=Weights.vector[Pat];

			/* compute the distance of this point to all centers */
			for(int32_t idx_k=0;idx_k<k;idx_k++)
				dists[idx_k]=distance->distance(Pat,idx_k);

			/* [mini,imini]=min(dists(:,i)) ; */
			imini=0 ; mini=dists[0];
			for (j=1; j<k; j++)
				if (dists[j]<mini)
				{
					mini=dists[j];
					imini=j;
				}

			if (imini!=ClList_Pat)
			{
				changed= changed + 1;

				/* weights_set(imini) = weights_set(imini) + weight ; */
				weights_set[imini]+= weight;
				/* weights_set(j)     = weights_set(j)     - weight ; */
				weights_set[ClList_Pat]-= weight;

				vec=lhs->get_feature_vector(Pat, vlen, vfree);

				for (j=0; j<dimensions; j++)
				{
					mus.matrix[imini*dimensions+j]-=(vec[j]
							-mus.matrix[imini*dimensions+j])
							*(weight/weights_set[imini]);
				}

				lhs->free_feature_vector(vec, Pat, vfree);

				/* mu_new = mu_old - (x - mu_old)/(n-1) */
				/* if weights_set(j)~=0 */
				if (weights_set[ClList_Pat]!=0.0)
				{
					vec=lhs->get_feature_vector(Pat, vlen, vfree);

					for (j=0; j<dimensions; j++)
					{
						mus.matrix[ClList_Pat*dimensions+j]-=
								(vec[j]
										-mus.matrix[ClList_Pat
												*dimensions+j])
										*(weight/weights_set[ClList_Pat]);
					}
					lhs->free_feature_vector(vec, Pat, vfree);
				}
				else
					/*  mus(:,j)=zeros(dimensions,1) ; */
					for (j=0; j<dimensions; j++)
						mus.matrix[ClList_Pat*dimensions+j]=0;

				/* ClList(i)= imini ; */
				ClList[Pat] = imini;
			}
		}
	}

	/* compute the ,,variances'' of the clusters */
	for (i=0; i<k; i++)
	{
		float64_t rmin1=0;
		float64_t rmin2=0;

		bool first_round=true;

		for (int32_t j=0; j<k; j++)
		{
			if (j!=i)
			{
				int32_t l;
				float64_t dist = 0;

				for (l=0; l<dimensions; l++)
				{
					dist+=CMath::sq(
							mus.matrix[i*dimensions+l]
									-mus.matrix[j*dimensions+l]);
				}

				if (first_round)
				{
					rmin1=dist;
					rmin2=dist;
					first_round=false;
				}
				else
				{
					if ((dist<rmin2) && (dist>=rmin1))
						rmin2=dist;

					if (dist<rmin1)
					{
						rmin2=rmin1;
						rmin1=dist;
					}
				}
			}
		}

		R.vector[i]=(0.7*CMath::sqrt(rmin1)+0.3*CMath::sqrt(rmin2));
	}

	distance->replace_rhs(rhs_cache);
	delete rhs_mus;
	SG_FREE(ClList);
	SG_FREE(weights_set);
	SG_FREE(dists);
	SG_UNREF(lhs);
}