double FeatureWeights_evaluate // Obsolete - use *_EvaluateWithTestSet // instead ( /////////////////////////////// // Parameters // /////////////////////////////// FeatureWeights fws, // Weights to evaluate // KNN nn, // Classifier // PatternList pp, // test pattern // Categories c, // test categories // long k, // k(!) // int d // distance weighting // ) { try { autoCategories o = KNN_classifyToCategories (nn, pp, fws, k, d); double hits = 0.0; for (long y = 1; y <= o->size; y ++) if (FeatureWeights_areFriends (o->at [y], c->at [y])) hits ++; hits /= o->size; return hits; } catch (MelderError) { throw; } }
autoCategories PatternList_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// PatternList p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { autoCategories categories = Categories_createWithSequentialNumbers (k); if (k == p->ny) return categories; autoKNN knn = KNN_create(); if (p -> ny % k) if (s > (double) (p -> ny / k) / (double) (p -> ny / k + 1)) // FIXME check whether integer division is correct s = (double) (p -> ny / k) / (double) (p -> ny / k + 1); double progress = m; autoNUMvector <double> sizes (0L, k); autoNUMvector <long> seeds (0L, k); autoPatternList centroids = PatternList_create (k, p -> nx); autoNUMvector <double> beta (0L, centroids -> nx); do { double delta; long nfriends = 0; Melder_progress (1 - (progress - m) / progress, U""); for (long y = 1; y <= centroids->ny; y++) { int ifriend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (ifriend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { ifriend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { ifriend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn.get(), centroids.get(), categories.get(), kOla_REPLACE, kOla_SEQUENTIAL); autoCategories interim = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x ++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp ++) { double alfa = 1; Melder_assert (yp <= centroids -> ny); for (long x = 1; x <= centroids -> nx; x ++) { beta [x] = centroids -> z [yp] [x]; } for (long ys = 1; ys <= interim->size; ys ++) { if (FeatureWeights_areFriends (categories->at [yp], interim->at [ys])) { for (long x = 1; x <= p -> nx; x ++) { Melder_assert (ys <= p -> ny); if (alfa == 1) { centroids -> z [yp] [x] = p -> z [ys] [x]; } else { centroids -> z [yp] [x] += (p -> z [ys] [x] - centroids -> z [yp] [x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa ++; } } for (long x = 1; x <= centroids -> nx; x ++) { delta += fabs (beta [x] - centroids -> z [yp] [x]); } } } while (delta != 0.0); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; -- m; } while (sizes[0] < s && m > 0); autoCategories output = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); return output; }
Categories Pattern_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { Categories categories = NULL, output = NULL; KNN knn = NULL; Pattern centroids = NULL; double *sizes = NULL, *beta = NULL; long *seeds = NULL; categories = Categories_sequentialNumbers (k); cherror if (k == p->ny) return categories; knn = KNN_create(); cherror if(p->ny % k) if (s > (double) (p->ny / k) / (double) (p->ny / k + 1)) s = (double) (p->ny / k) / (double) (p->ny / k + 1); double progress = m; sizes = NUMdvector (0, k); cherror seeds = NUMlvector (0, k); cherror centroids = Pattern_create (k, p->nx); beta = NUMdvector (0, centroids->nx); do { double delta; long nfriends = 0; if (!Melder_progress1(1 - (progress - m) / progress, L"")) break; for (long y = 1; y <= centroids->ny; y++) { int friend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (friend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { friend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { friend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn, centroids, categories, kOla_REPLACE, kOla_SEQUENTIAL); Categories interim = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp++) { double alfa = 1; Melder_assert (yp <= centroids->ny); for (long x = 1; x <= centroids->nx; x++) { beta[x] = centroids->z[yp][x]; } for (long ys = 1; ys <= interim->size; ys++) { if (FRIENDS(categories->item[yp], interim->item[ys])) { for (long x = 1; x <= p->nx; x++) { Melder_assert (ys <= p->ny); if (alfa == 1) { centroids->z[yp][x] = p->z[ys][x]; } else { centroids->z[yp][x] += (p->z[ys][x] - centroids->z[yp][x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa++; } } for (long x = 1; x <= centroids->nx; x++) { delta += fabs (beta[x] - centroids->z[yp][x]); } } forget (interim); } while (delta); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; --m; } while (sizes[0] < s && m > 0); Melder_progress1(1.0, NULL); output = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); cherror end: forget (centroids); forget (categories); forget (knn); NUMdvector_free (sizes, 0); NUMdvector_free (beta, 0); NUMlvector_free (seeds, 0); iferror return NULL; return output; }