int KNN_prune_noisy ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k // k(!) // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, p->ny - 1); // the coverage is not bounded by k but by n // long reachability = KNN_kNeighboursSkip(p, p, fws, y, k, indices, 0); .OS.081011 long reachability = KNN_kNeighboursSkip(p, p, fws, y, k, indices, y); long coverage = KNN_prune_kCoverage(p, c, y, k, indices); NUMlvector_free (indices, 0); forget(fws); if (!KNN_prune_superfluous(p, c, y, k, 0) && reachability > coverage) return(1); } return(0); }
long KNN_prune_kCoverage ( PatternList p, // source Categories c, // source long y, // source instance index long k, // k(!) long * indices // Out: kCoverage set ) { Melder_assert (y <= p->ny); Melder_assert (k > 0 && k <= p->ny); long cc = 0; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> tempindices (0L, p -> ny - 1); for (long yy = 1; yy <= p -> ny; yy ++) { if (y != yy && FeatureWeights_areFriends (c->at [y], c->at [yy])) { long n = KNN_kNeighboursSkip (p, p, fws.get(), yy, k, tempindices.peek(), y); while (n) { Melder_assert (n <= p -> ny); if (tempindices [-- n] == y) { indices [cc ++] = yy; break; } } } } return cc; }
int KNN_prune_superfluous ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k, // k(!) // long skipper // Skipping instance skipper // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, k - 1); long *freqindices = NUMlvector (0, k - 1); double *distances = NUMdvector (0, k - 1); double *freqs = NUMdvector (0, k - 1); // KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper); .OS.081011 -> if(!KNN_kNeighboursSkip(p, p, fws, y, k, indices, skipper)) return(0); // .OS.081011 <- long ncategories = KNN_kIndicesToFrequenciesAndDistances(c, k, indices, distances, freqs, freqindices); forget(fws); int result = FRIENDS(c->item[y], c->item[freqindices[KNN_max(freqs, ncategories)]]); NUMlvector_free (indices, 0); NUMlvector_free (freqindices, 0); NUMdvector_free (distances, 0); NUMdvector_free (freqs, 0); if (result) return 1; } return 0; }
long KNN_prune_kCoverage ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k, // k(!) // long * indices // Out: kCoverage set // ) { Melder_assert(y <= p->ny); Melder_assert(k > 0 && k <= p->ny); long cc = 0; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *tempindices = NUMlvector (0, p->ny - 1); for (long yy = 1; yy <= p->ny; yy++) { if (y != yy && FRIENDS(c->item[y], c->item[yy])) { // long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, 0); .OS.081011 long n = KNN_kNeighboursSkip(p, p, fws, yy, k, tempindices, y); while (n) { Melder_assert (n <= p->ny); if (tempindices[--n] == y) { indices[cc++] = yy; break; } } } } NUMlvector_free (tempindices, 0); forget(fws); } return(cc); }
int KNN_prune_noisy ( PatternList p, // source Categories c, // source long y, // source instance index long k // k(!) ) { if (y > p -> ny) y = p -> ny; // safety belt if (k > p -> ny) k = p -> ny; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> indices (0L, p->ny - 1); // the coverage is not bounded by k but by n long reachability = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y); long coverage = KNN_prune_kCoverage (p, c, y, k, indices.peek()); if (! KNN_prune_superfluous (p, c, y, k, 0) && reachability > coverage) return 1; return 0; }
int KNN_prune_critical ( PatternList p, // source Categories c, // source long y, // source instance index long k // k(!) ) { if (y > p -> ny) y = p -> ny; // safety belt if (k > p -> ny) k = p -> ny; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> indices (0L, k - 1); long ncollected = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y); for (long ic = 0; ic < ncollected; ic ++) { if (! KNN_prune_superfluous (p, c, indices [ic], k, 0) || ! KNN_prune_superfluous (p, c, indices [ic], k, y)) { return 1; } } return 0; }
int KNN_prune_critical ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long y, // source instance index // long k // k(!) // ) { if (y > p->ny) y = p->ny; // safety belt if (k > p->ny) k = p->ny; FeatureWeights fws = FeatureWeights_create(p->nx); if (fws) { long *indices = NUMlvector (0, k - 1); // long ncollected = KNN_kNeighboursSkip(p, p, fws, y, k, indices, 0); .OS.081011 long ncollected = KNN_kNeighboursSkip(p, p, fws, y, k, indices, y); for (long ic = 0; ic < ncollected; ic++) if (!KNN_prune_superfluous(p, c, indices[ic], k, 0) || !KNN_prune_superfluous(p, c, indices[ic], k, y)) { NUMlvector_free (indices, 0); forget(fws); return(1); } NUMlvector_free (indices, 0); } return(0); }
int KNN_prune_superfluous ( PatternList p, // source Categories c, // source long y, // source instance index long k, // k(!) long skipper // Skipping instance skipper ) { if (y > p -> ny) y = p -> ny; // safety belt if (k > p -> ny) k = p -> ny; autoFeatureWeights fws = FeatureWeights_create (p -> nx); autoNUMvector <long> indices (0L, k - 1); autoNUMvector <long> freqindices (0L, k - 1); autoNUMvector <double> distances (0L, k - 1); autoNUMvector <double> freqs (0L, k - 1); if (! KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), skipper)) return 0; long ncategories = KNN_kIndicesToFrequenciesAndDistances (c, k, indices.peek(), distances.peek(), freqs.peek(), freqindices.peek()); int result = FeatureWeights_areFriends (c->at [y], c->at [freqindices [KNN_max (freqs.peek(), ncategories)]]); if (result) return 1; return 0; }
autoFeatureWeights FeatureWeights_computeRELIEF ( /////////////////////////////// // Parameters // /////////////////////////////// PatternList pp, // source pattern // Categories c, // source categories // long k // k(!) // ) { autoPatternList p = Data_copy (pp); autoFeatureWeights me = FeatureWeights_create (p -> nx); ///////////////////////////////// // Initial weights <- 0 // ///////////////////////////////// for (long i = 1; i <= p->nx; i++) { my fweights -> data [1] [i] = 0.0; } ///////////////////////////////// // Normalization // ///////////////////////////////// autoNUMvector <double> min (0L, p->nx - 1); autoNUMvector <double> max (0L, p->nx - 1); for (long x = 1; x <= p -> nx; x ++) { max [x] = p -> z [1] [x]; // BUG: this will just crash because of array index out of bounds min [x] = max [x]; } for (long y = 1; y <= p -> ny; y ++) { for (long x = 1; x <= p->nx; x++) { if (p->z[y][x] > max[x]) max[x] = p->z[y][x]; if (p->z[y][x] < min[x]) min[x] = p->z[y][x]; } } autoNUMvector <double> alfa (0L, p -> nx - 1); for (long x = 1; x <= p -> nx; x ++) { alfa [x] = max [x] - min [x]; // BUG: this will just crash because of array index out of bounds } for (long y = 1; y <= p->ny; y++) { for (long x = 1; x <= p->nx; x++) { if (alfa [x] != 0.0) { p->z[y][x] = (p->z[y][x] - min[x]) / alfa[x]; } else { p->z[y][x] = 0.0; } } } ///////////////////////////////// // Computing prior class probs // ///////////////////////////////// autoNUMvector <double> priors (0L, c->size - 1); // worst-case allocations autoNUMvector <long> classes (0L, c->size - 1);// autoNUMvector <long> enemies (0L, c->size - 1);// autoNUMvector <long> friends (0L, c->size - 1);// long nclasses = FeatureWeights_computePriors (c, classes.peek(), priors.peek()); Melder_assert (nclasses >= 2); ///////////////////////////////// // Updating the w.vector // ///////////////////////////////// for (long y = 1; y <= p -> ny; y ++) { long nfriends = KNN_kFriends (p.get(), p.get(), c, y, k, friends.peek()); long nenemies = KNN_kUniqueEnemies (p.get(), p.get(), c, y, nclasses - 1, enemies.peek()); if (nfriends && nenemies) { autoNUMvector <double> classps (0L, nenemies - 1); for (long eq = 0; eq < nenemies; eq ++) { for (long iq = 0; iq < nclasses; iq ++) { if (FeatureWeights_areFriends (c->at [enemies [eq]], c->at [classes [iq]])) { classps [eq] = priors [iq]; break; } } } for (long x = 1; x <= p -> nx; x ++) { double p1 = 0.0; double p2 = 0.0; for (long ec = 0; ec < nfriends; ec ++) { p1 += fabs (p -> z [y] [x] - p -> z [friends [ec]] [x]) / (p -> ny * nfriends); } for (long ec = 0; ec < nenemies; ec++) { p2 += (fabs (p->z[y][x] - p->z[enemies[ec]][x]) * classps[ec]) / p->ny; } my fweights -> data [1] [x] = my fweights -> data [1] [x] - p1 + p2; } } } return me; }
autoFeatureWeights FeatureWeights_computeWrapperExt ( /////////////////////////////// // Parameters // /////////////////////////////// KNN nn, // Classifier // PatternList pp, // test pattern // Categories c, // test categories // long k, // k(!) // int d, // distance weighting // long nseeds, // the number of seeds // double alfa, // shrinkage factor // double stop, // stop at // int mode // mode (co/serial) // ) { if (! nn) return autoFeatureWeights(); try { double pivot = 0.5; double range = 0.5; autoNUMvector <double> results (0L, nseeds); autoThingVector <structFeatureWeights> cs (0L, nseeds); for (long y = 0; y <= nseeds; y++) { cs [y] = FeatureWeights_create (pp -> nx); } for (long x = 1; x <= pp -> nx; x ++) cs [nseeds] -> fweights -> data [1] [x] = pivot; results [nseeds] = FeatureWeights_evaluate (cs [nseeds].get(), nn, pp, c, k, d); while (range > 0 && results [nseeds] < stop) { long best = nseeds; if (mode == 2) { for (long x = 1; x <= pp->nx; x++) { for (long y = 0; y < nseeds; y++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, (cs[nseeds]->fweights)->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); results[y] = FeatureWeights_evaluate (cs[y].get(), nn, pp, c, k, d); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= pp->nx; x++) // BUG: a loop over x inside a loop over x; just hope mode is never 2 cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } } else { for (long y = 0; y < nseeds; y++) { for (long x = 1; x <= pp->nx; x++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); } results[y] = FeatureWeights_evaluate (cs [y].get(), nn, pp, c, k, d); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= pp->nx; x++) cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } range -= alfa; } autoFeatureWeights result = cs [nseeds].move(); return result; } catch (MelderError) { Melder_throw (U"FeatureWeights: wrapper not computed."); } }
autoFeatureWeights FeatureWeights_computeWrapperInt ( /////////////////////////////// // Parameters // /////////////////////////////// KNN me, // Classifier // long k, // k(!) // int d, // distance weighting // long nseeds, // the number of seeds // double alfa, // shrinkage factor // double stop, // stop at // int mode, // mode (co/serial) // int emode // evaluation mode (10-fold/L1O) // ) { if (! me) return autoFeatureWeights(); try { double pivot = 0.5; double range = 0.5; autoNUMvector <double> results (0L, nseeds); autoThingVector <structFeatureWeights> cs (0L, nseeds); for (long y = 0; y <= nseeds; y++) { cs [y] = FeatureWeights_create (my input -> nx); } for (long x = 1; x <= my input -> nx; x ++) cs [nseeds] -> fweights -> data [1] [x] = pivot; results [nseeds] = KNN_evaluate (me, cs [nseeds].get(), k, d, emode); while (range > 0 && results [nseeds] < stop) { long best = nseeds; if (mode == 2) { for (long x = 1; x <= (my input)->nx; x ++) { for (long y = 0; y < nseeds; y ++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); results[y] = KNN_evaluate (me, cs [y].get(), k, d, emode); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= (my input)->nx; x++) // HELP FIXME the same index for an inner and an outer loop!!! cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } } else { for (long y = 0; y < nseeds; y++) { for (long x = 1; x <= (my input)->nx; x++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); } results[y] = KNN_evaluate (me, cs [y].get(), k, d, emode); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= (my input)->nx; x++) cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } range -= alfa; } autoFeatureWeights result = cs [nseeds].move(); return result; } catch (MelderError) { Melder_throw (U"FeatureWeights: wrapper not computed."); } }