int KNN_prune_noisy ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k // k(!) // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, p->ny - 1); // the coverage is not bounded by k but by n // long reachability = KNN_kNeighboursSkip(p, p, fws, y, k, indices, 0); .OS.081011 long reachability = KNN_kNeighboursSkip(p, p, fws, y, k, indices, y); long coverage = KNN_prune_kCoverage(p, c, y, k, indices); NUMlvector_free (indices, 0); forget(fws); if (!KNN_prune_superfluous(p, c, y, k, 0) && reachability > coverage) return(1); } return(0); }
long KNN_prune_prune ( KNN me, // the classifier to be pruned double n, // pruning degree: noise, 0 <= n <= 1 double r, // pruning redundancy: noise, 0 <= n <= 1 long k // k(!) ) { autoCategories uniqueCategories = Categories_selectUniqueItems (my output.get()); if (Categories_getSize (uniqueCategories.get()) == my nInstances) return 0; long removals = 0; long ncandidates = 0; autoNUMvector <long> candidates (0L, my nInstances - 1); if (my nInstances <= 1) return 0; for (long y = 1; y <= my nInstances; y ++) { if (KNN_prune_noisy (my input.get(), my output.get(), y, k)) { if (n == 1 || NUMrandomUniform (0, 1) <= n) { KNN_removeInstance (me, y); ++ removals; } } } for (long y = 1; y <= my nInstances; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), y, k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), y, k)) candidates [ncandidates ++] = y; } KNN_prune_sort (my input.get(), my output.get(), k, candidates.peek(), ncandidates); for (long y = 0; y < ncandidates; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), candidates [y], k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), candidates [y], k)) { if (r == 1.0 || NUMrandomUniform (0.0, 1.0) <= r) { KNN_removeInstance (me, candidates[y]); for (long i = y + 1; i < ncandidates; ++ i) { if(candidates[i] > candidates[y]) -- candidates[i]; } ++ removals; } } } return removals; }
int KNN_prune_critical ( PatternList p, // source Categories c, // source long y, // source instance index long k // k(!) ) { if (y > p -> ny) y = p -> ny; // safety belt if (k > p -> ny) k = p -> ny; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> indices (0L, k - 1); long ncollected = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y); for (long ic = 0; ic < ncollected; ic ++) { if (! KNN_prune_superfluous (p, c, indices [ic], k, 0) || ! KNN_prune_superfluous (p, c, indices [ic], k, y)) { return 1; } } return 0; }
int KNN_prune_critical ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k // k(!) // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, k - 1); // long ncollected = KNN_kNeighboursSkip(p, p, fws, y, k, indices, 0); .OS.081011 long ncollected = KNN_kNeighboursSkip(p, p, fws, y, k, indices, y); for (long ic = 0; ic < ncollected; ic++) if (!KNN_prune_superfluous(p, c, indices[ic], k, 0) || !KNN_prune_superfluous(p, c, indices[ic], k, y)) { NUMlvector_free (indices, 0); forget(fws); return(1); } NUMlvector_free (indices, 0); } return(0); }
int KNN_prune_noisy ( PatternList p, // source Categories c, // source long y, // source instance index long k // k(!) ) { if (y > p -> ny) y = p -> ny; // safety belt if (k > p -> ny) k = p -> ny; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> indices (0L, p->ny - 1); // the coverage is not bounded by k but by n long reachability = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y); long coverage = KNN_prune_kCoverage (p, c, y, k, indices.peek()); if (! KNN_prune_superfluous (p, c, y, k, 0) && reachability > coverage) return 1; return 0; }
long KNN_prune_prune ( /////////////////////////////// // Parameters // /////////////////////////////// KNN me, // the classifier to be pruned // double n, // pruning degree: noise, 0 <= n <= 1 // double r, // pruning redundancy: noise, 0 <= n <= 1 // long k // k(!) // ) { Categories uniqueCategories = Categories_selectUniqueItems(my output, 1); if(Categories_getSize(uniqueCategories) == my nInstances) return(0); long removals = 0; long ncandidates = 0; long *candidates = NUMlvector (0, my nInstances - 1); double progress = 1 / (double) my nInstances; if(my nInstances <= 1) return(0); for (long y = 1; y <= my nInstances; y++) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning noisy instances")) return(removals); if (KNN_prune_noisy(my input, my output, y, k)) { if (n == 1 || NUMrandomUniform(0, 1) <= n) { KNN_removeInstance(me, y); ++removals; } } } Melder_progress1(1.0, NULL); for (long y = 1; y <= my nInstances; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Identifying superfluous and critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, y, k, 0) && !KNN_prune_critical(my input, my output, y, k)) candidates[ncandidates++] = y; } Melder_progress1(1.0, NULL); KNN_prune_sort(my input, my output, k, candidates, ncandidates); progress = 1 / ncandidates; for (long y = 0; y < ncandidates; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning superfluous non-critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, candidates[y], k, 0) && !KNN_prune_critical(my input, my output, candidates[y], k)) { if (r == 1 || NUMrandomUniform(0, 1) <= r) { KNN_removeInstance(me, candidates[y]); for(long i = y + 1; i < ncandidates; ++i) if(candidates[i] > candidates[y]) --candidates[i]; ++removals; } } } Melder_progress1(1.0, NULL); NUMlvector_free (candidates, 0); return(removals); }