Пример #1
0
vector< vector<u32> > kmeans(const vector< vector<f64> >& points, u32 k,
                             vector< vector<f64> >& centers)
{
    if (points.size() == 0)
        throw eInvalidArgument("There must be points to cluster!");
    if (points[0].size() == 0)
        throw eInvalidArgument("There must be at least one dimension!");
    for (size_t i = 1; i < points.size(); i++)
        if (points[i].size() != points[0].size())
            throw eInvalidArgument("All data points must have the same dimensionality.");
    if (k == 0)
        throw eInvalidArgument("Clustering into zero clusters makes no sense.");
    if ((u32)centers.size() != k)
        throw eInvalidArgument("You must supply k initial centers to this version of kmeans().");
    for (u32 i = 0; i < k; i++)
        if (centers[i].size() != points[0].size())
            throw eInvalidArgument("All initial centers must have the same dimensionality as the data points.");

    vector< vector<u32> > clusters(k);

    while (true)
    {
        vector< vector<u32> > newClusters(k);

        for (size_t i = 0; i < points.size(); i++)
        {
            f64 dist = s_distanceSquared(points[i], centers[0]);
            u32 closest = 0;
            for (u32 c = 1; c < k; c++)
            {
                f64 distHere = s_distanceSquared(points[i], centers[c]);
                if (distHere < dist)
                {
                    closest = c;
                    dist = distHere;
                }
            }
            newClusters[closest].push_back((u32)i);
        }

        for (u32 i = 0; i < k; i++)
            if (newClusters[i].size() > 0)
                centers[i] = s_calcCenter(points, newClusters[i]);
            // Else, what should I do? Leave it? Randomize a new center?

        if (clusters == newClusters)
            break;

        clusters = newClusters;
    }

    return clusters;
}
Пример #2
0
DEBaseAlgorithm::DEBaseAlgorithm(DEBaseParameters& para)
	:_parameters(para),
	 _bestClusters(para.N,para.atomTypes,para.atomNumber)
{
	int i = 0;
	for(i = 0; i < para._numberOfClusters; i++)
	{
		DEBaseClusters newClusters(para.N,para.atomTypes,para.atomNumber);
		_X.push_back(newClusters);
		_V.push_back(newClusters);
		_U.push_back(newClusters);
	}
}
Пример #3
0
// Zhu et al. "A Rank-Order Distance based Clustering Algorithm for Face Tagging", CVPR 2011
br::Clusters br::ClusterGallery(const QStringList &simmats, float aggressiveness, const QString &csv)
{
    qDebug("Clustering %d simmat(s)", simmats.size());

    // Read in gallery parts, keeping top neighbors of each template
    Neighborhood neighborhood = getNeighborhood(simmats);
    const int cutoff = neighborhood.first().size();
    const float threshold = 3*cutoff/4 * aggressiveness/5;

    // Initialize clusters
    Clusters clusters(neighborhood.size());
    for (int i=0; i<neighborhood.size(); i++)
        clusters[i].append(i);

    bool done = false;
    while (!done) {
        // nextClusterIds[i] = j means that cluster i is set to merge into cluster j
        QVector<int> nextClusterIDs(neighborhood.size());
        for (int i=0; i<neighborhood.size(); i++) nextClusterIDs[i] = i;

        // For each cluster
        for (int clusterID=0; clusterID<neighborhood.size(); clusterID++) {
            const Neighbors &neighbors = neighborhood[clusterID];
            int nextClusterID = nextClusterIDs[clusterID];

            // Check its neighbors
            foreach (const Neighbor &neighbor, neighbors) {
                int neighborID = neighbor.first;
                int nextNeighborID = nextClusterIDs[neighborID];

                // Don't bother if they have already merged
                if (nextNeighborID == nextClusterID) continue;

                // Flag for merge if similar enough
                if (normalizedROD(neighborhood, clusterID, neighborID) < threshold) {
                    if (nextClusterID < nextNeighborID) nextClusterIDs[neighborID] = nextClusterID;
                    else                                nextClusterIDs[clusterID] = nextNeighborID;
                }
            }
        }

        // Transitive merge
        for (int i=0; i<neighborhood.size(); i++) {
            int nextClusterID = i;
            while (nextClusterID != nextClusterIDs[nextClusterID]) {
                assert(nextClusterIDs[nextClusterID] < nextClusterID);
                nextClusterID = nextClusterIDs[nextClusterID];
            }
            nextClusterIDs[i] = nextClusterID;
        }

        // Construct new clusters
        QHash<int, int> clusterIDLUT;
        QList<int> allClusterIDs = QSet<int>::fromList(nextClusterIDs.toList()).values();
        for (int i=0; i<neighborhood.size(); i++)
            clusterIDLUT[i] = allClusterIDs.indexOf(nextClusterIDs[i]);

        Clusters newClusters(allClusterIDs.size());
        Neighborhood newNeighborhood(allClusterIDs.size());

        for (int i=0; i<neighborhood.size(); i++) {
            int newID = clusterIDLUT[i];
            newClusters[newID].append(clusters[i]);
            newNeighborhood[newID].append(neighborhood[i]);
        }

        // Update indices and trim
        for (int i=0; i<newNeighborhood.size(); i++) {
            Neighbors &neighbors = newNeighborhood[i];
            int size = qMin(neighbors.size(),cutoff);
            std::partial_sort(neighbors.begin(), neighbors.begin()+size, neighbors.end(), compareNeighbors);
            for (int j=0; j<size; j++)
                neighbors[j].first = clusterIDLUT[j];
            neighbors = neighbors.mid(0, cutoff);
        }

        // Update results
        done = true; //(newClusters.size() >= clusters.size());
        clusters = newClusters;
        neighborhood = newNeighborhood;
    }