Beispiel #1
0
void Kmeans::Generel_K_Means(Matrix matrix)
{
	//Kmeans will only work with its assigned values, whereas gmeans can modify kmeans.
	int n_Iters, i, j;
	bool no_assignment_change;

	n_Iters = 0;
	no_assignment_change = true;

	//DO while of the algorithm to assign to new cluster is closer, update the centroid.
	//Before we get here we will have used Well_Seperated_Centroids which will be the initial partition of data
	do
	{
		pre_Result = result;
		n_Iters++;
		//We estimate its a stable cluster if it can be finished within 5 steps
		if (n_Iters >EST_START)
			stabilized = true;

		//If there is no new assignment we have finished clustering.
		if (Assign_Cluster(matrix, stabilized) == 0){}
		else
		{
			/*There was an assignment thus we have to compute new centroids and update the size of each cluster,
			*as values will be assigned to other clusters*/
			no_assignment_change = false;
			
			Compute_Cluster_Size();

			//If unstable copy concept_vectors onto old_conceptVectors
			if (n_Iters >= EST_START)
				for (i = 0; i < n_Clusters; i++)
					for (j = 0; j < row; j++)
						old_ConceptVectors[i][j] = concept_Vectors[i][j];

			//Update Centroids based on the new clusters
			Update_Centroids(matrix);

			//Average vector for all clusters (concept_vectors now contain the average vector of each cluster)
			for (i = 0; i < n_Clusters; i++)
				average_vec(concept_Vectors[i], row, clusterSize[i]);
			//normal Vector gets the squared average vector
			for (i = 0; i < n_Clusters; i++)
				normal_ConceptVectors[i] = norm_2(concept_Vectors[i], row);
			//if unstable run
			if (n_Iters >= EST_START)
			{
				//Calculate the difference between the previous average vector versus the new vector
				for (i = 0; i < n_Clusters; i++)
				{
					difference[i] = 0.0;
					for (j = 0; j < row; j++)
						difference[i] += (old_ConceptVectors[i][j] - concept_Vectors[i][j]) * (old_ConceptVectors[i][j] - concept_Vectors[i][j]);
				}

				//returning distance between the squared average vector and the average vector to sim_mat
				if (n_Iters > EST_START)
					for (i = 0; i<col; i++)
						sim_Mat[cluster[i]][i] = matrix.Euc_Dis(concept_Vectors[cluster[i]], i, normal_ConceptVectors[cluster[i]]);
				else
					for (i = 0; i < n_Clusters; i++)
						matrix.Euc_Dis(concept_Vectors[i], normal_ConceptVectors[i], sim_Mat[i]);
			}
			else
				for (i = 0; i < n_Clusters; i++)//returning distance between the squared average vector and the average vector to sim_mat
					matrix.Euc_Dis(concept_Vectors[i], normal_ConceptVectors[i], sim_Mat[i]);

			//initialize cluster_quality
			for (i = 0; i<n_Clusters; i++)
				cluster_quality[i] = 0.0;

			//update the cluster quality based on the collected simulation matrix
			for (i = 0; i < col; i++)
			{
				cluster_quality[cluster[i]] += sim_Mat[cluster[i]][i];
			}
			//Coherence is based on the total cluster quality
			result = Coherence(n_Clusters);

			std::cout << "E";
		}//epsilon is a user defined function default set to 0.0001, initial_obj_fun_val is defined by the initial partioning.
	} while ((pre_Result - result) > epsilon*initial_obj_fun_val);
	std::cout << std::endl;

	//if the kmeans has run and it was stabil we retrieve the euclidean distance of concept_vectors and the normal_ConceptVectors
	if (stabilized)
		for (i = 0; i < n_Clusters; i++)
			matrix.Euc_Dis(concept_Vectors[i], normal_ConceptVectors[i], sim_Mat[i]);
	//if it required to assign new changes it will update quility change matrix
	if ((!no_assignment_change) && (f_v_times >0))
		for (i = 0; i<n_Clusters; i++)
			Update_Quality_Change_Mat(matrix, i);

}
Beispiel #2
0
//Initialize the Concept Vectors
void Kmeans::Initialize_CV(Matrix matrix)
{

	int i, j, k;

	Well_Separated_Centroids(matrix);

	// reset concept vectors

	for (i = 0; i < n_Clusters; i++)
		for (j = 0; j < row; j++)
			concept_Vectors[i][j] = 0.0;
	for (i = 0; i < col; i++)
	{
		//add current concept_vector to the original vector
		if ((cluster[i] >= 0) && (cluster[i] < n_Clusters))
			matrix.Ith_Add_CV(i, concept_Vectors[cluster[i]]);
		else
			cluster[i] = 0;
	}
	//compute how many points belongs to the different cluster
	Compute_Cluster_Size();

	//average vector for each cluster
	for (i = 0; i < n_Clusters; i++)
		average_vec(concept_Vectors[i], row, clusterSize[i]);

	//Compute the normal vector for n_clusters number of vectors = ^2 
	for (i = 0; i < n_Clusters; i++)
		normal_ConceptVectors[i] = norm_2(concept_Vectors[i], row);

	//calculate the distance from the concept vectors to the normal vectors
	for (i = 0; i < n_Clusters; i++)
		matrix.Euc_Dis(concept_Vectors[i], normal_ConceptVectors[i], sim_Mat[i]);

	//Init Cluster quality which is the distance between normal CV and concept_vectors
	for (i = 0; i<n_Clusters; i++)
		cluster_quality[i] = 0.0;
	k = 0;
	for (i = 0; i < col; i++)
	{
		cluster_quality[cluster[i]] += sim_Mat[cluster[i]][i];
	}
	//for (i = 0; i < n_Clusters; i++)
	//diff[i] = 0.0;

	//A random constant figured out based on cluster_quality
	initial_obj_fun_val = result = Coherence(n_Clusters);
	fv_threshold = -1.0*initial_obj_fun_val*delta;

	if (f_v_times >0)
	{
		//init and use quality change matrix (used to change quality Matrix)
		quality_change_mat = new double *[n_Clusters];
		// VT 2009-11-28
		for (int j = 0; j < n_Clusters; j++)
			quality_change_mat[j] = new double[col];

		for (i = 0; i < n_Clusters; i++)
			Update_Quality_Change_Mat(matrix, i);
	}
}
Beispiel #3
0
//Centroids should be separated
void Kmeans::Well_Separated_Centroids(Matrix matrix)
{
	int i, j, k, min_ind, *cv = new int[n_Clusters];
	double min, cos_sum;
	bool *mark = new bool[col];

	for (i = 0; i< col; i++)
		mark[i] = false;


	for (i = 0; i < n_Clusters; i++)
		for (j = 0; j < row; j++)
			concept_Vectors[i][j] = 0.0;

	//Random vectors generation
	do{
		cv[0] = rand_gen.GetUniformInt(col);
	} while (mark[cv[0]]);

	//add current concept_vector to the original vector
	matrix.Ith_Add_CV(cv[0], concept_Vectors[0]);
	mark[cv[0]] = true;

	//get normal CV
	normal_ConceptVectors[0] = matrix.GetNorm(cv[0]);
	//Euclidean Distance between the vectors
	matrix.Euc_Dis(concept_Vectors[0], normal_ConceptVectors[0], sim_Mat[0]);

	//Create random concept vectors (roughly in the same area)
	for (i = 1; i<n_Clusters; i++)
	{
		min_ind = 0;
		min = 0.0;
		for (j = 0; j<col; j++)
		{
			if (!mark[j])
			{
				cos_sum = 0.0;
				for (k = 0; k<i; k++)
					cos_sum += sim_Mat[k][j];
				if (cos_sum > min)
				{
					min = cos_sum;
					min_ind = j;
				}
			}
		}
		cv[i] = min_ind;
		matrix.Ith_Add_CV(cv[i], concept_Vectors[i]);

		normal_ConceptVectors[i] = matrix.GetNorm(cv[i]);
		matrix.Euc_Dis(concept_Vectors[i], normal_ConceptVectors[i], sim_Mat[i]);
		mark[cv[i]] = true;
	}

	//assign the points to clusters
	for (i = 0; i<col; i++)
		cluster[i] = 0;
	Assign_Cluster(matrix, false);

	delete[] cv;
	delete[] mark;
}
Beispiel #4
0
int Kmeans::Assign_Cluster(Matrix matrix, bool stabilized)
{

	int i, j, multi = 0, changed = 0, temp_Cluster_ID;
	double temp_sim;

	//if stabil (run less than 
	if (stabilized)
	{
		//Init the distance based on old distances
		for (i = 0; i < n_Clusters; i++)
			for (j = 0; j < col; j++)
				if (i != cluster[j])
					sim_Mat[i][j] += difference[i] - 2 * sqrt(difference[i] * sim_Mat[i][j]);

		for (i = 0; i < col; i++)
		{
			temp_sim = sim_Mat[cluster[i]][i];
			temp_Cluster_ID = cluster[i];

			for (j = 0; j < n_Clusters; j++)
				if (j != cluster[i])
				{
					//if current placement is furthere away than new possible placement, assign new cluster if new one is closer
					if (sim_Mat[j][i] < temp_sim)
					{
						multi++;
						//recalculate the new vector based on new formula
						sim_Mat[j][i] = matrix.Euc_Dis(concept_Vectors[j], i, normal_ConceptVectors[j]);
						//if current placement is furthere away than new possible placement, assign new cluster if new one is closer
						if (sim_Mat[j][i] < temp_sim)
						{
							temp_sim = sim_Mat[j][i];
							temp_Cluster_ID = j;
						}
					}
				}
			//Assign new cluster if closer than previous cluster
			if (temp_Cluster_ID != cluster[i])
			{
				cluster[i] = temp_Cluster_ID;
				sim_Mat[cluster[i]][i] = temp_sim;
				changed++;
			}
		}
	}
	//if unstable 
	else
	{
		for (i = 0; i < col; i++)
		{
			temp_sim = sim_Mat[cluster[i]][i];
			temp_Cluster_ID = cluster[i];

			for (j = 0; j < n_Clusters; j++)
				//if point does not belong to cluster do
				if (j != cluster[i])
				{
					multi++;
					//if current placement is furthere away than new possible placement, assign new cluster if new one is closer
					if (sim_Mat[j][i] < temp_sim)
					{
						temp_sim = sim_Mat[j][i];
						temp_Cluster_ID = j;
					}
				}
			//Assign new cluster if closer than previous cluster
			if (temp_Cluster_ID != cluster[i])
			{
				cluster[i] = temp_Cluster_ID;
				sim_Mat[cluster[i]][i] = temp_sim;
				changed++;
			}
		}
	}
	return changed;
}