long KNN_prune_prune ( KNN me, // the classifier to be pruned double n, // pruning degree: noise, 0 <= n <= 1 double r, // pruning redundancy: noise, 0 <= n <= 1 long k // k(!) ) { autoCategories uniqueCategories = Categories_selectUniqueItems (my output.get()); if (Categories_getSize (uniqueCategories.get()) == my nInstances) return 0; long removals = 0; long ncandidates = 0; autoNUMvector <long> candidates (0L, my nInstances - 1); if (my nInstances <= 1) return 0; for (long y = 1; y <= my nInstances; y ++) { if (KNN_prune_noisy (my input.get(), my output.get(), y, k)) { if (n == 1 || NUMrandomUniform (0, 1) <= n) { KNN_removeInstance (me, y); ++ removals; } } } for (long y = 1; y <= my nInstances; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), y, k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), y, k)) candidates [ncandidates ++] = y; } KNN_prune_sort (my input.get(), my output.get(), k, candidates.peek(), ncandidates); for (long y = 0; y < ncandidates; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), candidates [y], k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), candidates [y], k)) { if (r == 1.0 || NUMrandomUniform (0.0, 1.0) <= r) { KNN_removeInstance (me, candidates[y]); for (long i = y + 1; i < ncandidates; ++ i) { if(candidates[i] > candidates[y]) -- candidates[i]; } ++ removals; } } } return removals; }
long KNN_prune_prune ( /////////////////////////////// // Parameters // /////////////////////////////// KNN me, // the classifier to be pruned // double n, // pruning degree: noise, 0 <= n <= 1 // double r, // pruning redundancy: noise, 0 <= n <= 1 // long k // k(!) // ) { Categories uniqueCategories = Categories_selectUniqueItems(my output, 1); if(Categories_getSize(uniqueCategories) == my nInstances) return(0); long removals = 0; long ncandidates = 0; long *candidates = NUMlvector (0, my nInstances - 1); double progress = 1 / (double) my nInstances; if(my nInstances <= 1) return(0); for (long y = 1; y <= my nInstances; y++) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning noisy instances")) return(removals); if (KNN_prune_noisy(my input, my output, y, k)) { if (n == 1 || NUMrandomUniform(0, 1) <= n) { KNN_removeInstance(me, y); ++removals; } } } Melder_progress1(1.0, NULL); for (long y = 1; y <= my nInstances; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Identifying superfluous and critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, y, k, 0) && !KNN_prune_critical(my input, my output, y, k)) candidates[ncandidates++] = y; } Melder_progress1(1.0, NULL); KNN_prune_sort(my input, my output, k, candidates, ncandidates); progress = 1 / ncandidates; for (long y = 0; y < ncandidates; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning superfluous non-critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, candidates[y], k, 0) && !KNN_prune_critical(my input, my output, candidates[y], k)) { if (r == 1 || NUMrandomUniform(0, 1) <= r) { KNN_removeInstance(me, candidates[y]); for(long i = y + 1; i < ncandidates; ++i) if(candidates[i] > candidates[y]) --candidates[i]; ++removals; } } } Melder_progress1(1.0, NULL); NUMlvector_free (candidates, 0); return(removals); }