示例#1
0
/*
k-means center initialization using the following algorithm:
Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
*/
static void generateCentersPP(const Mat& _data, Mat& _out_centers,
                              int K, RNG& rng, int trials)
{
    int i, j, k, dims = _data.cols, N = _data.rows;
    const float* data = _data.ptr<float>(0);
    size_t step = _data.step/sizeof(data[0]);
    std::vector<int> _centers(K);
    int* centers = &_centers[0];
    std::vector<float> _dist(N*3);
    float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
    double sum0 = 0;

    centers[0] = (unsigned)rng % N;

    for( i = 0; i < N; i++ )
    {
        dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims);
        sum0 += dist[i];
    }

    for( k = 1; k < K; k++ )
    {
        double bestSum = DBL_MAX;
        int bestCenter = -1;

        for( j = 0; j < trials; j++ )
        {
            double p = (double)rng*sum0, s = 0;
            for( i = 0; i < N-1; i++ )
                if( (p -= dist[i]) <= 0 )
                    break;
            int ci = i;

            parallel_for_(Range(0, N),
                          KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
            for( i = 0; i < N; i++ )
            {
                s += tdist2[i];
            }

            if( s < bestSum )
            {
                bestSum = s;
                bestCenter = ci;
                std::swap(tdist, tdist2);
            }
        }
        centers[k] = bestCenter;
        sum0 = bestSum;
        std::swap(dist, tdist);
    }

    for( k = 0; k < K; k++ )
    {
        const float* src = data + step*centers[k];
        float* dst = _out_centers.ptr<float>(k);
        for( j = 0; j < dims; j++ )
            dst[j] = src[j];
    }
}
void HistEncoding::deserializeSpec(const tinyxml2::XMLElement* root)
{
    if (const tinyxml2::XMLElement *k = root->FirstChildElement("k"))
    {
        k->QueryIntText(&_k);
    }

    if (const tinyxml2::XMLElement *pca = root->FirstChildElement("pca"))
    {
        _usePCA = true;
        pca->QueryIntText(&_pcaDim);
    }

    if (const tinyxml2::XMLElement *centers = root->FirstChildElement("centers"))
    {
        _computed = true;

        const tinyxml2::XMLElement *center = centers->FirstChildElement("center");
        std::vector<std::string> elems = stringSplit(std::string(center->GetText()), ' ');
        _centers = SampleType(_k, elems.size());
        std::cout << elems.size() << std::endl;
        int row = 0;
        for (size_t i = 0; i < elems.size(); ++i)
        {
            _centers(row, i) = boost::lexical_cast<Scalar>(elems[i]);
        }

        row = 1;
        for (center = center->NextSiblingElement("center");
             center != NULL;
             center = center->NextSiblingElement("center"))
        {
            elems = stringSplit(std::string(center->GetText()), ' ');
            for (unsigned int i = 0; i < _centers.cols(); ++i)
            {
                _centers(row, i) = boost::lexical_cast<Scalar>(elems[i]);
            }
            row += 1;
        }

        _tree = new tet::classification::util::ANNTree<Scalar>(_centers);
    }
}