Esempio n. 1
0
double FeatureWeights_evaluate      // Obsolete - use *_EvaluateWithTestSet
                                    //            instead
(
///////////////////////////////
// Parameters                //
///////////////////////////////

    FeatureWeights fws,             // Weights to evaluate
                                    //
    KNN nn,                         // Classifier
                                    //
    PatternList pp,                     // test pattern
                                    //
    Categories c,                   // test categories
                                    //
    long k,                         // k(!)
                                    //
    int d                           // distance weighting
                                    //
)

{
	try {
		autoCategories o = KNN_classifyToCategories (nn, pp, fws, k, d);
		double hits = 0.0;
		for (long y = 1; y <= o->size; y ++)
			if (FeatureWeights_areFriends (o->at [y], c->at [y])) hits ++;
		hits /= o->size;
		return hits;
	} catch (MelderError) {
		throw;
	}
}
autoCategories PatternList_to_Categories_cluster
(
    ///////////////////////////////
    // Parameters                //
    ///////////////////////////////

    PatternList p,              // source
    //
    FeatureWeights fws,     // feature weights
    //
    long k,                 // k(!)
    //
    double s,               // clustersize constraint 0 < s <= 1
    //
    long m                  // reseed maximum
    //
)

{
    autoCategories categories = Categories_createWithSequentialNumbers (k);
    if (k == p->ny)
        return categories;

    autoKNN knn = KNN_create();
    if (p -> ny % k)
        if (s > (double) (p -> ny / k) / (double) (p -> ny / k + 1))   // FIXME check whether integer division is correct
            s = (double) (p -> ny / k) / (double) (p -> ny / k + 1);

    double progress = m;
    autoNUMvector <double> sizes (0L, k);
    autoNUMvector <long> seeds (0L, k);

    autoPatternList centroids = PatternList_create (k, p -> nx);
    autoNUMvector <double> beta (0L, centroids -> nx);

    do
    {
        double delta;
        long nfriends  = 0;
        Melder_progress (1 - (progress - m) / progress, U"");

        for (long y = 1; y <= centroids->ny; y++)
        {
            int ifriend = 1;
            long ys = (long) lround(NUMrandomUniform(1, p->ny));

            if (nfriends)
            {
                while (ifriend)
                {
                    ys = (long) lround(NUMrandomUniform(1, p->ny));
                    for (long fc = 0; fc < nfriends; fc++)
                    {
                        ifriend = 0;
                        Melder_assert (fc < k);
                        if (seeds [fc] == ys)
                        {
                            ifriend = 1;
                            break;
                        }
                    }
                }
            }
            Melder_assert (nfriends <= k);
            seeds [nfriends++] = ys;

            for (long x = 1; x <= centroids->nx; x++)
                centroids->z[y][x] = p->z[ys][x];
        }
        do
        {
            delta = 0;
            KNN_learn (knn.get(), centroids.get(), categories.get(), kOla_REPLACE, kOla_SEQUENTIAL);
            autoCategories interim = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING);

            for (long x = 1; x <= k; x ++)
                sizes [x] = 0;

            for (long yp = 1; yp <= categories->size; yp ++)
            {
                double alfa = 1;
                Melder_assert (yp <= centroids -> ny);

                for (long x = 1; x <= centroids -> nx; x ++)
                {
                    beta [x] = centroids -> z [yp] [x];
                }

                for (long ys = 1; ys <= interim->size; ys ++)
                {
                    if (FeatureWeights_areFriends (categories->at [yp], interim->at [ys]))
                    {
                        for (long x = 1; x <= p -> nx; x ++)
                        {
                            Melder_assert (ys <= p -> ny);
                            if (alfa == 1)
                            {
                                centroids -> z [yp] [x] = p -> z [ys] [x];
                            }
                            else
                            {
                                centroids -> z [yp] [x] += (p -> z [ys] [x] - centroids -> z [yp] [x]) / alfa;
                            }
                        }
                        Melder_assert (yp <= k);
                        sizes [yp] ++;
                        alfa ++;
                    }
                }

                for (long x = 1; x <= centroids -> nx; x ++)
                {
                    delta += fabs (beta [x] - centroids -> z [yp] [x]);
                }
            }
        }
        while (delta != 0.0);

        double smax = sizes [1];
        double smin = sizes [1];

        for (long x = 1; x <= k; x++)
        {
            if (smax < sizes [x]) smax = sizes [x];
            if (smin > sizes [x]) smin = sizes [x];
        }

        sizes [0] = smin / smax;
        -- m;
    }
    while (sizes[0] < s && m > 0);

    autoCategories output = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING);

    return output;
}
Categories Pattern_to_Categories_cluster
(
    ///////////////////////////////
    // Parameters                //
    ///////////////////////////////

    Pattern p,              // source
                            //
    FeatureWeights fws,     // feature weights
                            //
    long k,                 // k(!)
                            //
    double s,               // clustersize constraint 0 < s <= 1
                            //
    long m                  // reseed maximum
                            //
)

{
    Categories categories = NULL, output = NULL;
    KNN knn = NULL;
    Pattern centroids = NULL;
    double *sizes = NULL, *beta = NULL;
    long *seeds = NULL;

	categories = Categories_sequentialNumbers (k); cherror
    if (k == p->ny)
        return categories;

    knn = KNN_create(); cherror
    if(p->ny % k) 
        if (s > (double) (p->ny / k) / (double) (p->ny / k + 1)) 
            s = (double) (p->ny / k) / (double) (p->ny / k + 1);

    double progress = m;
    sizes = NUMdvector (0, k); cherror
    seeds = NUMlvector (0, k); cherror

    centroids = Pattern_create (k, p->nx);
    beta = NUMdvector (0, centroids->nx);

    do
    {
        double delta;
        long nfriends  = 0;
        if (!Melder_progress1(1 - (progress - m) / progress, L"")) break;

        for (long y = 1; y <= centroids->ny; y++)
        {
            int friend = 1;
            long ys = (long) lround(NUMrandomUniform(1, p->ny));

            if (nfriends)
            {
                while (friend)
                {
                    ys = (long) lround(NUMrandomUniform(1, p->ny));
                    for (long fc = 0; fc < nfriends; fc++)
                    {
                        friend = 0;
                        Melder_assert (fc < k);
                        if (seeds [fc] == ys)
                        {
                            friend = 1;
                            break;
                        }
                    }
                }
            }
			Melder_assert (nfriends <= k);
            seeds [nfriends++] = ys;

            for (long x = 1; x <= centroids->nx; x++)
                centroids->z[y][x] = p->z[ys][x];
        }
        do
        {
            delta = 0;
            KNN_learn (knn, centroids, categories, kOla_REPLACE, kOla_SEQUENTIAL);
            Categories interim = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING);

            for (long x = 1; x <= k; x++)
                sizes [x] = 0;

            for (long yp = 1; yp <= categories->size; yp++)
            {
                double alfa = 1;
                Melder_assert (yp <= centroids->ny);

                for (long x = 1; x <= centroids->nx; x++)
                {
                    beta[x] = centroids->z[yp][x];
                }

                for (long ys = 1; ys <= interim->size; ys++)
                {
                    if (FRIENDS(categories->item[yp], interim->item[ys]))
                    {
                        for (long x = 1; x <= p->nx; x++)
                        {
                        	Melder_assert (ys <= p->ny);
                            if (alfa == 1)
                            {
                                centroids->z[yp][x] = p->z[ys][x];
                            }
                            else
                            {
                                centroids->z[yp][x] += (p->z[ys][x] - centroids->z[yp][x]) / alfa;
                            }
                        }
                        Melder_assert (yp <= k);
                        sizes [yp] ++;
                        alfa++;
                    }
                }

                for (long x = 1; x <= centroids->nx; x++)
                {
                    delta += fabs (beta[x] - centroids->z[yp][x]);
                }
            }
            forget (interim);
        }
        while (delta);

        double smax = sizes [1];
        double smin = sizes [1];

        for (long x = 1; x <= k; x++)
        {
            if (smax < sizes [x]) smax = sizes [x];
            if (smin > sizes [x]) smin = sizes [x];
        }

        sizes [0] = smin / smax;
        --m;
    }
    while (sizes[0] < s && m > 0);

    Melder_progress1(1.0, NULL);

    output = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); cherror

end:
    forget (centroids);
    forget (categories);
    forget (knn);
    NUMdvector_free (sizes, 0);
    NUMdvector_free (beta, 0);
    NUMlvector_free (seeds, 0);
    iferror return NULL;
    return output;
}