Пример #1
0
void kmedoids_allocator::allocate(const std::size_t p_amount, const dataset & p_data, cluster_sequence & p_clusters) {
    medoid_sequence initial_medoids;
    kmeans_plus_plus(p_amount).initialize(p_data, initial_medoids);

    kmedoids_data result;
    kmedoids(initial_medoids).process(p_data, result);

    p_clusters = std::move(result.clusters());
}
void AgglomerativeClustering::startClustering(const ClusterMethodParameters* pParameters, Document *pDocument, ClusteringResult *pClusteringResult)
{
	mpDocument = pDocument;
	mpClusteringResult = pClusteringResult;
	// preprocess glyphs or compute features, depending on distance type:
	if (pParameters->dataType == FEATURES_BASED) {
		pDocument->computeFeatures();
	}
	else {
		pDocument->preprocessAllGlyphs();
	}
//	this->mpDocument->setDistanceType(pParameters->dataType);

	// retrieve parameters:
	const AgglomerativeParameters *pParams = (const AgglomerativeParameters*)(pParameters);

	// print some debug info:
	std::cout << "Starting agglomerative clustering algorithm..." << std::endl;
	std::cout << "nr of clusters: " << pParams->nClusters << std::endl;
	std::cout << "feature dist type: " << pParams->featureDistType << std::endl;
	std::cout << "cluster dist type: " << pParams->clusterDistType << std::endl;

	mpClusteringResult->deleteClustering(); // delete probably old clustering result
//	setClusterDistanceTypeFunctionPointer(pParams->clusterDistType);
	const int nSamples = this->mpDocument->nParsedImages();
	if (nSamples < 2) {
		throw NoDataException("No data found for clustering!");
	}
	if (pParameters->dataType == FEATURES_BASED && this->mpDocument->nFeatures() < 2) {
		throw NoDataException("No features found for clustering!");
	}

	StopWatch watch;
	// initialize distance matrix:
	std::cout << "computing distance matrix..." << std::endl;
	watch.start();
	initDistanceMatrix();
	watch.stop();
	std::cout << "successfully computed distance matrix" << std::endl;

	// TEST: USING CLUSTER LIBRARY:
	double **tmpdistmatrix;

	// Allocate memory for distance matrix
	tmpdistmatrix = new double*[nSamples];
	for (int i = 0; i < nSamples; ++i)
		tmpdistmatrix[i] = new double[nSamples];
	// copy distance matrix
	for (int i=0;i<nSamples;++i){
		for (int j=0; j<=i; ++j) {
			tmpdistmatrix[i][j] = mDistMat(i,j);
			tmpdistmatrix[j][i] = mDistMat(i,j);
		}
	}
	std::vector<int> labels(nSamples);
#if 0
	int npass = 100;
	double error;
	int ifound;
	watch.start();
	kmedoids (pParams->nClusters, nSamples, tmpdistmatrix,
			npass, &labels[0], &error, &ifound);
	watch.stop();
	std::cout << "finished kmedioids, error = " << error << ", ifound = " << ifound << std::endl;

#else
	// method = 's' (single-linkage), 'm' (complete-linkage), 'a' (average-linkage) or 'c' (centroid-linkage):
	char methodChar='a';
	switch (pParams->clusterDistType) {
		case AVG_DIST:
			methodChar='a';
			break;
		case MIN_DIST:
			methodChar='s';
			break;
		case MAX_DIST:
			methodChar='m';
			break;
		default:
			throw Exception("Unknown distance type in AgglomerativeClustering::startClustering()");
			break;
	} // end switch

	Node* tree = treecluster(nSamples, 1, NULL, NULL, NULL, 0, '_', methodChar, tmpdistmatrix);
	if (tree==NULL)
		std::cerr << "FATAL ERROR - NULL POINTER IN CLUSTER RESULT!" << std::endl;

	// cut hierarchical cluster tree at specified nr of clusters:
	cuttree(nSamples, tree, pParams->nClusters, &labels[0]);
	delete [] tree;
#endif

	// De-Allocate memory for temporary distance matrix:
	for (int i = 0; i < nSamples; ++i)
		delete [] tmpdistmatrix[i];
	delete [] tmpdistmatrix;
	std::cout << "finished agglo clustering with cluster libarary!!" << std::endl;
	pClusteringResult->createClusteringResultFromLabelVector(labels, pDocument);
	/////////////////// END TEST

#if 0
	// get pointer to image chars:
	std::vector<ImageChar*> *imageCharVecPointer = this->mpDocument->getImageCharsVecPointer();

	// create cluster for each instance:
	CharCluster *pCluster=NULL;
	for (int i=0; i<nSamples; ++i) {
		pCluster = mpClusteringResult->addEmptyCluster(this->mpDocument);
		pCluster->addChar( (*imageCharVecPointer)[i] );
	}


	std::cout << "Starting merging process..." << std::endl;
//	watch.start();
	std::vector<float> minValVec;
	std::vector<int> nClustsVec;
	// while nr of clusters not reached -> merge two nearest clusters
//	while (mClusterVec.size() > pParams->nClusters) {
	while (mpClusteringResult->nClusters() > pParams->nClusters) {
		// find min element of distance matrix:
		minValVec.push_back(mMinDist);
//		nClustsVec.push_back(mClusterVec.size());
		nClustsVec.push_back(mpClusteringResult->nClusters());
//		std::cout << "nr of clusters is " << mClusterVec.size() << std::endl;
//		std::cout << "min dist is " << mMinDist << " on index " << mMinIndex << std::endl;
		watch.start();
		updateClusterLabels();
		std::cout << "updated cluster labels, time = " << watch.stop(false) << std::endl;

		watch.start();
		findMinDist();
		std::cout << "found min dist, time = " << watch.stop(false) << std::endl;
		std::cout << "new nr of clusters is " << mpClusteringResult->nClusters() << std::endl;
	} // end while
//	writeTxtFile(nClustsVec, "C:/projekte/impact/matlab_sebastian/c_prog_out/n_clusts_vec.txt");
//	writeTxtFile(minValVec, "C:/projekte/impact/matlab_sebastian/c_prog_out/min_val_vec.txt");
//	watch.stop();
#endif

	pDocument->clearAllPreprocessing();
	pDocument->clearFeatures();
	pClusteringResult->computePrototypeFeatures();
	pClusteringResult->updatePrototypes(true);

	return;
} // end startClustering()