Polygon Polygon_createFromRandomVertices (long numberOfVertices, double xmin, double xmax, double ymin, double ymax) { try { autoPolygon me = Polygon_create (numberOfVertices); for (long i = 1; i <= numberOfVertices; i++) { my x[i] = NUMrandomUniform (xmin, xmax); my y[i] = NUMrandomUniform (ymin, ymax); } return me.transfer(); } catch (MelderError) { Melder_throw ("Polygon not created."); } }
void Configuration_randomize (Configuration me) { for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { my data[i][j] = NUMrandomUniform (-1.0, 1.0); } } }
void PairDistribution_peekPair (PairDistribution me, char32 **string1, char32 **string2) { try { *string1 = *string2 = nullptr; double total = 0.0; long nin = my pairs -> size, iin; PairProbability prob; if (nin < 1) Melder_throw (U"No candidates."); for (iin = 1; iin <= nin; iin ++) { prob = static_cast <PairProbability> (my pairs -> item [iin]); total += prob -> weight; } do { double rand = NUMrandomUniform (0, total), sum = 0.0; for (iin = 1; iin <= nin; iin ++) { prob = static_cast <PairProbability> (my pairs -> item [iin]); sum += prob -> weight; if (rand <= sum) break; } } while (iin > nin); // guard against rounding errors prob = static_cast <PairProbability> (my pairs -> item [iin]); if (! prob -> string1 || ! prob -> string2) Melder_throw (U"No string in probability pair ", iin, U"."); *string1 = prob -> string1; *string2 = prob -> string2; } catch (MelderError) { Melder_throw (me, U": pair not peeked."); } }
void PairDistribution_to_Stringses (PairDistribution me, long nout, autoStrings *strings1_out, autoStrings *strings2_out) { try { long nin = my pairs -> size, iin; if (nin < 1) Melder_throw (U"No candidates."); if (nout < 1) Melder_throw (U"Number of generated string pairs should be positive."); double total = PairDistributions_getTotalWeight_checkPositive (me); autoStrings strings1 = Thing_new (Strings); strings1 -> numberOfStrings = nout; strings1 -> strings = NUMvector <char32 *> (1, nout); autoStrings strings2 = Thing_new (Strings); strings2 -> numberOfStrings = nout; strings2 -> strings = NUMvector <char32 *> (1, nout); for (long iout = 1; iout <= nout; iout ++) { do { double rand = NUMrandomUniform (0, total), sum = 0.0; for (iin = 1; iin <= nin; iin ++) { PairProbability prob = static_cast <PairProbability> (my pairs -> item [iin]); sum += prob -> weight; if (rand <= sum) break; } } while (iin > nin); /* Guard against rounding errors. */ PairProbability prob = static_cast <PairProbability> (my pairs -> item [iin]); if (! prob -> string1 || ! prob -> string2) Melder_throw (U"No string in probability pair ", iin, U"."); strings1 -> strings [iout] = Melder_dup (prob -> string1); strings2 -> strings [iout] = Melder_dup (prob -> string2); } *strings1_out = strings1.move(); *strings2_out = strings2.move(); } catch (MelderError) { Melder_throw (me, U": generation of Stringses not performed."); } }
void Distributions_peek (Distributions me, long column, char32 **string, long *number) { Distributions_checkSpecifiedColumnNumberWithinRange (me, column); if (my numberOfRows < 1) Melder_throw (me, U": I have no candidates."); double total = 0.0; for (long irow = 1; irow <= my numberOfRows; irow ++) { total += my data [irow] [column]; } if (total <= 0.0) Melder_throw (me, U": the total weight of column ", column, U" is not positive."); long irow; do { double rand = NUMrandomUniform (0, total), sum = 0.0; for (irow = 1; irow <= my numberOfRows; irow ++) { sum += my data [irow] [column]; if (rand <= sum) break; } } while (irow > my numberOfRows); // guard against rounding errors if (my rowLabels [irow] == NULL) Melder_throw (me, U": no string in row ", irow, U"."); if (string) *string = my rowLabels [irow]; if (number) *number = irow; }
void KNN_prune_sort ( PatternList p, // source Categories c, // source long k, // k(!) long * indices, // indices of instances to be sorted long nindices // the number of instances to be sorted ) { long n = nindices; autoNUMvector <long> h (0L, nindices - 1); for (long cc = 0; cc < nindices; ++ cc) h [cc] = KNN_friendsAmongkNeighbours (p, p, c, indices [cc], k); while (-- n) { // insertion-sort, is heap-sort worth the effort? for (long m = n; m < nindices - 1; m ++) { if (h [m - 1] > h[m]) break; if (h [m - 1] < h[m]) { OlaSWAP (long, indices [m - 1], indices [m]); } else { if (KNN_nearestEnemy (p, p, c, indices [m - 1]) < KNN_nearestEnemy (p, p, c, indices [m])) { OlaSWAP (long, indices [m - 1], indices [m]); } else { if (NUMrandomUniform (0, 1) > 0.5) { OlaSWAP (long, indices [m - 1], indices [m]); } } } } } }
static long stochastic (I, const double activation[]) { iam (FFNet); long i; double number, range = 0, lower = 0; for (i = 1; i <= my nOutputs; i++) range += activation[i]; number = NUMrandomUniform (0,1) * range; for (i = 1; i <= my nOutputs; i++) if (number < (lower += activation[i])) break; return i; }
long KNN_prune_prune ( KNN me, // the classifier to be pruned double n, // pruning degree: noise, 0 <= n <= 1 double r, // pruning redundancy: noise, 0 <= n <= 1 long k // k(!) ) { autoCategories uniqueCategories = Categories_selectUniqueItems (my output.get()); if (Categories_getSize (uniqueCategories.get()) == my nInstances) return 0; long removals = 0; long ncandidates = 0; autoNUMvector <long> candidates (0L, my nInstances - 1); if (my nInstances <= 1) return 0; for (long y = 1; y <= my nInstances; y ++) { if (KNN_prune_noisy (my input.get(), my output.get(), y, k)) { if (n == 1 || NUMrandomUniform (0, 1) <= n) { KNN_removeInstance (me, y); ++ removals; } } } for (long y = 1; y <= my nInstances; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), y, k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), y, k)) candidates [ncandidates ++] = y; } KNN_prune_sort (my input.get(), my output.get(), k, candidates.peek(), ncandidates); for (long y = 0; y < ncandidates; ++ y) { if (KNN_prune_superfluous (my input.get(), my output.get(), candidates [y], k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), candidates [y], k)) { if (r == 1.0 || NUMrandomUniform (0.0, 1.0) <= r) { KNN_removeInstance (me, candidates[y]); for (long i = y + 1; i < ncandidates; ++ i) { if(candidates[i] > candidates[y]) -- candidates[i]; } ++ removals; } } } return removals; }
void FFNet_reset (FFNet me, double wrange) { long i; for (i = 1; i <= my nWeights; i++) if (my wSelected[i]) my w[i] = NUMrandomUniform (-wrange, wrange); for (i = 1; i <= my nNodes; i++) my activity[i] = (my isbias[i] ? 1.0 : 0.0); my accumulatedCost = 0.0; forget (my minimizer); }
void FFNet_reset (FFNet me, double wrange) { for (long i = 1; i <= my nWeights; i++) { if (my wSelected[i]) { my w[i] = NUMrandomUniform (-wrange, wrange); } } for (long i = 1; i <= my nNodes; i++) { my activity[i] = (my isbias[i] ? 1.0 : 0.0); } my accumulatedCost = 0.0; my minimizer.reset(nullptr); }
void KNN_prune_sort ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // Categories c, // source // long k, // k(!) // long * indices, // indices of instances to be sorted // long nindices // the number of instances to be sorted // ) { long n = nindices; long *h = NUMlvector (0, nindices - 1); for (long cc = 0; cc < nindices; ++cc) h[cc] = KNN_friendsAmongkNeighbours(p, p, c, indices[cc], k); while (--n) // insertion-sort, is heap-sort worth the effort? { for (long m = n; m < nindices - 1; m++) { if (h[m - 1] > h[m]) break; if (h[m - 1] < h[m]) { OlaSWAP(long, indices[m - 1], indices[m]); } else { if (KNN_nearestEnemy(p, p, c, indices[m - 1]) < KNN_nearestEnemy(p, p, c, indices[m])) { OlaSWAP(long, indices[m - 1], indices[m]); } else { if (NUMrandomUniform(0, 1) > 0.5) OlaSWAP(long, indices[m - 1], indices[m]); } } } } NUMlvector_free (h, 0); }
autoPointProcess PointProcess_createPoissonProcess (double startingTime, double finishingTime, double density) { try { long nt = NUMrandomPoisson ((finishingTime - startingTime) * density); autoPointProcess me = PointProcess_create (startingTime, finishingTime, nt); my nt = nt; for (long i = 1; i <= nt; i ++) my t [i] = NUMrandomUniform (startingTime, finishingTime); NUMsort_d (my nt, my t); return me; } catch (MelderError) { Melder_throw (U"PointProcess (Poisson process) not created."); } }
static long stochastic (FFNet me, const double activation[]) { long i; double range = 0.0, lower = 0.0; for (i = 1; i <= my nOutputs; i++) { range += activation[i]; } double number = NUMrandomUniform (0.0, range); for (i = 1; i <= my nOutputs; i++) { lower += activation[i]; if (number < lower) { break; } } return i; }
void Minimizer_reset (Minimizer me, const double guess[]) { if (guess) { for (long i = 1; i <= my nParameters; i++) { my p[i] = guess[i]; } } else { for (long i = 1; i <= my nParameters; i++) { my p[i] = NUMrandomUniform (-1.0, 1.0); } } NUMvector_free<double> (my history, 1); my history = nullptr; my maxNumOfIterations = my success = my funcCalls = my iteration = 0; my minimum = 1.0e38; my v_reset (); }
long FFNet_getWinningUnit (FFNet me, int labeling) { long i, pos = 1, k = my nNodes - my nOutputs; if (labeling == 2) /* stochastic */ { double sum = 0, random; for (i=1; i <= my nOutputs; i++) sum += my activity[k+i]; random = NUMrandomUniform (0, sum); for (pos=my nOutputs; pos >= 2; pos--) if (random > (sum -= my activity[k+pos])) break; } else /* winner-takes-all */ { double max = my activity[k+1]; for (i=2; i <= my nOutputs; i++) if (my activity[k+i] > max) { max = my activity[k+i]; pos = i; } } return pos; }
/* * Generate n different cct's that have a common diagonalizer. */ autoCrossCorrelationTables CrossCorrelationTables_createTestSet (long dimension, long n, int firstPositiveDefinite, double sigma) { try { // Start with a square matrix with random gaussian elements and make its singular value decomposition UDV' // The V matrix will be the common diagonalizer matrix that we use. autoNUMmatrix<double> d (1, dimension, 1, dimension); for (long i = 1; i <= dimension; i++) { // Generate the rotation matrix for (long j = 1; j <= dimension; j++) { d[i][j] = NUMrandomGauss (0, 1); } } autoNUMmatrix<double> v (1, dimension, 1, dimension); autoSVD svd = SVD_create_d (d.peek(), dimension, dimension); autoCrossCorrelationTables me = CrossCorrelationTables_create (); for (long i = 1; i <= dimension; i++) { for (long j = 1; j <= dimension; j++) { d[i][j] = 0; } } // Start with a diagonal matrix D and calculate V'DV for (long k = 1; k <= n; k++) { autoCrossCorrelationTable ct = CrossCorrelationTable_create (dimension); double low = k == 1 && firstPositiveDefinite ? 0.1 : -1; for (long i = 1; i <= dimension; i++) { d[i][i] = NUMrandomUniform (low, 1); } for (long i = 1; i <= dimension; i++) { for (long j = 1; j <= dimension; j++) { v[i][j] = NUMrandomGauss (svd -> v[i][j], sigma); } } // we need V'DV, however our V has eigenvectors row-wise -> VDV' NUMdmatrices_multiply_VCVp (ct -> data, v.peek(), dimension, dimension, d.peek(), 1); Collection_addItem_move (me.peek(), ct.move()); } return me; } catch (MelderError) { Melder_throw (U"CrossCorrelationTables test set not created."); } }
void Minimizer_reset (Minimizer me, const double guess[]) { if (guess) { for (long i = 1; i <= my nParameters; i++) { my p[i] = guess[i]; } } else { for (long i = 1; i <= my nParameters; i++) { my p[i] = NUMrandomUniform (-1, 1); } } /* Don't use NUMdvector_free: realloc in Minimizer_minimize */ if (my history != 0) { my history++; Melder_free (my history); } my maxNumOfIterations = my success = my funcCalls = my iteration = 0; my minimum = 1.0e38; my v_reset (); }
double testCosineTransform (long n) { try { autoNUMvector<double> x (1, n); autoNUMvector<double> y (1, n); autoNUMvector<double> x2 (1, n); autoNUMmatrix<double> cosinesTable (NUMcosinesTable (n), 1, 1); for (long i = 1 ; i <= n; i++) { x[i] = NUMrandomUniform (0, 70); } NUMcosineTransform (x.peek(), y.peek(), n, cosinesTable.peek()); NUMinverseCosineTransform (y.peek(), x2.peek(), n, cosinesTable.peek()); double delta = 0; for (long i =1 ; i <= n; i++) { double dif = x[i] - x2[i]; delta += dif * dif; } delta = sqrt (delta); return delta; } catch (MelderError) { Melder_throw ("Test cosine transform error"); } }
long FFNet_getWinningUnit (FFNet me, int labeling) { long pos = 1, k = my nNodes - my nOutputs; if (labeling == 2) { /* stochastic */ double sum = 0.0; for (long i = 1; i <= my nOutputs; i++) { sum += my activity[k + i]; } double random = NUMrandomUniform (0.0, sum); for (pos = my nOutputs; pos >= 2; pos--) { if (random > (sum -= my activity[k + pos])) { break; } } } else { /* winner-takes-all */ double max = my activity[k + 1]; for (long i = 2; i <= my nOutputs; i++) if (my activity[k + i] > max) { max = my activity[k + i]; pos = i; } } return pos; }
autoCategories PatternList_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// PatternList p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { autoCategories categories = Categories_createWithSequentialNumbers (k); if (k == p->ny) return categories; autoKNN knn = KNN_create(); if (p -> ny % k) if (s > (double) (p -> ny / k) / (double) (p -> ny / k + 1)) // FIXME check whether integer division is correct s = (double) (p -> ny / k) / (double) (p -> ny / k + 1); double progress = m; autoNUMvector <double> sizes (0L, k); autoNUMvector <long> seeds (0L, k); autoPatternList centroids = PatternList_create (k, p -> nx); autoNUMvector <double> beta (0L, centroids -> nx); do { double delta; long nfriends = 0; Melder_progress (1 - (progress - m) / progress, U""); for (long y = 1; y <= centroids->ny; y++) { int ifriend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (ifriend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { ifriend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { ifriend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn.get(), centroids.get(), categories.get(), kOla_REPLACE, kOla_SEQUENTIAL); autoCategories interim = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x ++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp ++) { double alfa = 1; Melder_assert (yp <= centroids -> ny); for (long x = 1; x <= centroids -> nx; x ++) { beta [x] = centroids -> z [yp] [x]; } for (long ys = 1; ys <= interim->size; ys ++) { if (FeatureWeights_areFriends (categories->at [yp], interim->at [ys])) { for (long x = 1; x <= p -> nx; x ++) { Melder_assert (ys <= p -> ny); if (alfa == 1) { centroids -> z [yp] [x] = p -> z [ys] [x]; } else { centroids -> z [yp] [x] += (p -> z [ys] [x] - centroids -> z [yp] [x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa ++; } } for (long x = 1; x <= centroids -> nx; x ++) { delta += fabs (beta [x] - centroids -> z [yp] [x]); } } } while (delta != 0.0); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; -- m; } while (sizes[0] < s && m > 0); autoCategories output = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING); return output; }
autoSound Sound_Point_Pitch_Duration_to_Sound (Sound me, PointProcess pulses, PitchTier pitch, DurationTier duration, double maxT) { try { long ipointleft, ipointright; double deltat = 0, handledTime = my xmin; double startOfSourceNoise, endOfSourceNoise, startOfTargetNoise, endOfTargetNoise; double durationOfSourceNoise, durationOfTargetNoise; double startOfSourceVoice, endOfSourceVoice, startOfTargetVoice, endOfTargetVoice; double durationOfSourceVoice, durationOfTargetVoice; double startingPeriod, finishingPeriod, ttarget, voicelessPeriod; if (duration -> points.size == 0) Melder_throw (U"No duration points."); /* * Create a Sound long enough to hold the longest possible duration-manipulated sound. */ autoSound thee = Sound_create (1, my xmin, my xmin + 3 * (my xmax - my xmin), 3 * my nx, my dx, my x1); /* * Below, I'll abbreviate the voiced interval as "voice" and the voiceless interval as "noise". */ if (pitch && pitch -> points.size) for (ipointleft = 1; ipointleft <= pulses -> nt; ipointleft = ipointright + 1) { /* * Find the beginning of the voice. */ startOfSourceVoice = pulses -> t [ipointleft]; // the first pulse of the voice startingPeriod = 1.0 / RealTier_getValueAtTime (pitch, startOfSourceVoice); startOfSourceVoice -= 0.5 * startingPeriod; // the first pulse is in the middle of a period /* * Measure one noise. */ startOfSourceNoise = handledTime; endOfSourceNoise = startOfSourceVoice; durationOfSourceNoise = endOfSourceNoise - startOfSourceNoise; startOfTargetNoise = startOfSourceNoise + deltat; endOfTargetNoise = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, endOfSourceNoise); durationOfTargetNoise = endOfTargetNoise - startOfTargetNoise; /* * Copy the noise. */ voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget = startOfTargetNoise + 0.5 * voicelessPeriod; while (ttarget < endOfTargetNoise) { double tsource; double tleft = startOfSourceNoise, tright = endOfSourceNoise; int i; for (i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); copyBell (me, tsource, voicelessPeriod, voicelessPeriod, thee.get(), ttarget); voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget += voicelessPeriod; } deltat += durationOfTargetNoise - durationOfSourceNoise; /* * Find the end of the voice. */ for (ipointright = ipointleft + 1; ipointright <= pulses -> nt; ipointright ++) if (pulses -> t [ipointright] - pulses -> t [ipointright - 1] > maxT) break; ipointright --; endOfSourceVoice = pulses -> t [ipointright]; // the last pulse of the voice finishingPeriod = 1.0 / RealTier_getValueAtTime (pitch, endOfSourceVoice); endOfSourceVoice += 0.5 * finishingPeriod; // the last pulse is in the middle of a period /* * Measure one voice. */ durationOfSourceVoice = endOfSourceVoice - startOfSourceVoice; /* * This will be copied to an interval with a different location and duration. */ startOfTargetVoice = startOfSourceVoice + deltat; endOfTargetVoice = startOfTargetVoice + RealTier_getArea (duration, startOfSourceVoice, endOfSourceVoice); durationOfTargetVoice = endOfTargetVoice - startOfTargetVoice; /* * Copy the voiced part. */ ttarget = startOfTargetVoice + 0.5 * startingPeriod; while (ttarget < endOfTargetVoice) { double tsource, period; long isourcepulse; double tleft = startOfSourceVoice, tright = endOfSourceVoice; int i; for (i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetVoice + RealTier_getArea (duration, startOfSourceVoice, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); period = 1.0 / RealTier_getValueAtTime (pitch, tsource); isourcepulse = PointProcess_getNearestIndex (pulses, tsource); copyBell2 (me, pulses, isourcepulse, period, period, thee.get(), ttarget, maxT); ttarget += period; } deltat += durationOfTargetVoice - durationOfSourceVoice; handledTime = endOfSourceVoice; } /* * Copy the remaining unvoiced part, if we are at the end. */ startOfSourceNoise = handledTime; endOfSourceNoise = my xmax; durationOfSourceNoise = endOfSourceNoise - startOfSourceNoise; startOfTargetNoise = startOfSourceNoise + deltat; endOfTargetNoise = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, endOfSourceNoise); durationOfTargetNoise = endOfTargetNoise - startOfTargetNoise; voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget = startOfTargetNoise + 0.5 * voicelessPeriod; while (ttarget < endOfTargetNoise) { double tsource; double tleft = startOfSourceNoise, tright = endOfSourceNoise; for (int i = 1; i <= 15; i ++) { double tsourcemid = 0.5 * (tleft + tright); double ttargetmid = startOfTargetNoise + RealTier_getArea (duration, startOfSourceNoise, tsourcemid); if (ttargetmid < ttarget) tleft = tsourcemid; else tright = tsourcemid; } tsource = 0.5 * (tleft + tright); copyBell (me, tsource, voicelessPeriod, voicelessPeriod, thee.get(), ttarget); voicelessPeriod = NUMrandomUniform (0.008, 0.012); ttarget += voicelessPeriod; } /* * Find the number of trailing zeroes and hack the sound's time domain. */ thy xmax = thy xmin + RealTier_getArea (duration, my xmin, my xmax); if (fabs (thy xmax - my xmax) < 1e-12) thy xmax = my xmax; // common situation thy nx = Sampled_xToLowIndex (thee.get(), thy xmax); if (thy nx > 3 * my nx) thy nx = 3 * my nx; return thee; } catch (MelderError) { Melder_throw (me, U": not manipulated."); } }
long KNN_prune_prune ( /////////////////////////////// // Parameters // /////////////////////////////// KNN me, // the classifier to be pruned // double n, // pruning degree: noise, 0 <= n <= 1 // double r, // pruning redundancy: noise, 0 <= n <= 1 // long k // k(!) // ) { Categories uniqueCategories = Categories_selectUniqueItems(my output, 1); if(Categories_getSize(uniqueCategories) == my nInstances) return(0); long removals = 0; long ncandidates = 0; long *candidates = NUMlvector (0, my nInstances - 1); double progress = 1 / (double) my nInstances; if(my nInstances <= 1) return(0); for (long y = 1; y <= my nInstances; y++) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning noisy instances")) return(removals); if (KNN_prune_noisy(my input, my output, y, k)) { if (n == 1 || NUMrandomUniform(0, 1) <= n) { KNN_removeInstance(me, y); ++removals; } } } Melder_progress1(1.0, NULL); for (long y = 1; y <= my nInstances; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Identifying superfluous and critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, y, k, 0) && !KNN_prune_critical(my input, my output, y, k)) candidates[ncandidates++] = y; } Melder_progress1(1.0, NULL); KNN_prune_sort(my input, my output, k, candidates, ncandidates); progress = 1 / ncandidates; for (long y = 0; y < ncandidates; ++y) { if (!Melder_progress1(1 - (double) y * progress, L"Pruning superfluous non-critical instances")) return(removals); if (KNN_prune_superfluous(my input, my output, candidates[y], k, 0) && !KNN_prune_critical(my input, my output, candidates[y], k)) { if (r == 1 || NUMrandomUniform(0, 1) <= r) { KNN_removeInstance(me, candidates[y]); for(long i = y + 1; i < ncandidates; ++i) if(candidates[i] > candidates[y]) --candidates[i]; ++removals; } } } Melder_progress1(1.0, NULL); NUMlvector_free (candidates, 0); return(removals); }
Categories Pattern_to_Categories_cluster ( /////////////////////////////// // Parameters // /////////////////////////////// Pattern p, // source // FeatureWeights fws, // feature weights // long k, // k(!) // double s, // clustersize constraint 0 < s <= 1 // long m // reseed maximum // ) { Categories categories = NULL, output = NULL; KNN knn = NULL; Pattern centroids = NULL; double *sizes = NULL, *beta = NULL; long *seeds = NULL; categories = Categories_sequentialNumbers (k); cherror if (k == p->ny) return categories; knn = KNN_create(); cherror if(p->ny % k) if (s > (double) (p->ny / k) / (double) (p->ny / k + 1)) s = (double) (p->ny / k) / (double) (p->ny / k + 1); double progress = m; sizes = NUMdvector (0, k); cherror seeds = NUMlvector (0, k); cherror centroids = Pattern_create (k, p->nx); beta = NUMdvector (0, centroids->nx); do { double delta; long nfriends = 0; if (!Melder_progress1(1 - (progress - m) / progress, L"")) break; for (long y = 1; y <= centroids->ny; y++) { int friend = 1; long ys = (long) lround(NUMrandomUniform(1, p->ny)); if (nfriends) { while (friend) { ys = (long) lround(NUMrandomUniform(1, p->ny)); for (long fc = 0; fc < nfriends; fc++) { friend = 0; Melder_assert (fc < k); if (seeds [fc] == ys) { friend = 1; break; } } } } Melder_assert (nfriends <= k); seeds [nfriends++] = ys; for (long x = 1; x <= centroids->nx; x++) centroids->z[y][x] = p->z[ys][x]; } do { delta = 0; KNN_learn (knn, centroids, categories, kOla_REPLACE, kOla_SEQUENTIAL); Categories interim = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); for (long x = 1; x <= k; x++) sizes [x] = 0; for (long yp = 1; yp <= categories->size; yp++) { double alfa = 1; Melder_assert (yp <= centroids->ny); for (long x = 1; x <= centroids->nx; x++) { beta[x] = centroids->z[yp][x]; } for (long ys = 1; ys <= interim->size; ys++) { if (FRIENDS(categories->item[yp], interim->item[ys])) { for (long x = 1; x <= p->nx; x++) { Melder_assert (ys <= p->ny); if (alfa == 1) { centroids->z[yp][x] = p->z[ys][x]; } else { centroids->z[yp][x] += (p->z[ys][x] - centroids->z[yp][x]) / alfa; } } Melder_assert (yp <= k); sizes [yp] ++; alfa++; } } for (long x = 1; x <= centroids->nx; x++) { delta += fabs (beta[x] - centroids->z[yp][x]); } } forget (interim); } while (delta); double smax = sizes [1]; double smin = sizes [1]; for (long x = 1; x <= k; x++) { if (smax < sizes [x]) smax = sizes [x]; if (smin > sizes [x]) smin = sizes [x]; } sizes [0] = smin / smax; --m; } while (sizes[0] < s && m > 0); Melder_progress1(1.0, NULL); output = KNN_classifyToCategories (knn, p, fws, 1, kOla_FLAT_VOTING); cherror end: forget (centroids); forget (categories); forget (knn); NUMdvector_free (sizes, 0); NUMdvector_free (beta, 0); NUMlvector_free (seeds, 0); iferror return NULL; return output; }
autoFeatureWeights FeatureWeights_computeWrapperExt ( /////////////////////////////// // Parameters // /////////////////////////////// KNN nn, // Classifier // PatternList pp, // test pattern // Categories c, // test categories // long k, // k(!) // int d, // distance weighting // long nseeds, // the number of seeds // double alfa, // shrinkage factor // double stop, // stop at // int mode // mode (co/serial) // ) { if (! nn) return autoFeatureWeights(); try { double pivot = 0.5; double range = 0.5; autoNUMvector <double> results (0L, nseeds); autoThingVector <structFeatureWeights> cs (0L, nseeds); for (long y = 0; y <= nseeds; y++) { cs [y] = FeatureWeights_create (pp -> nx); } for (long x = 1; x <= pp -> nx; x ++) cs [nseeds] -> fweights -> data [1] [x] = pivot; results [nseeds] = FeatureWeights_evaluate (cs [nseeds].get(), nn, pp, c, k, d); while (range > 0 && results [nseeds] < stop) { long best = nseeds; if (mode == 2) { for (long x = 1; x <= pp->nx; x++) { for (long y = 0; y < nseeds; y++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, (cs[nseeds]->fweights)->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); results[y] = FeatureWeights_evaluate (cs[y].get(), nn, pp, c, k, d); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= pp->nx; x++) // BUG: a loop over x inside a loop over x; just hope mode is never 2 cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } } else { for (long y = 0; y < nseeds; y++) { for (long x = 1; x <= pp->nx; x++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); } results[y] = FeatureWeights_evaluate (cs [y].get(), nn, pp, c, k, d); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= pp->nx; x++) cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } range -= alfa; } autoFeatureWeights result = cs [nseeds].move(); return result; } catch (MelderError) { Melder_throw (U"FeatureWeights: wrapper not computed."); } }
autoFeatureWeights FeatureWeights_computeWrapperInt ( /////////////////////////////// // Parameters // /////////////////////////////// KNN me, // Classifier // long k, // k(!) // int d, // distance weighting // long nseeds, // the number of seeds // double alfa, // shrinkage factor // double stop, // stop at // int mode, // mode (co/serial) // int emode // evaluation mode (10-fold/L1O) // ) { if (! me) return autoFeatureWeights(); try { double pivot = 0.5; double range = 0.5; autoNUMvector <double> results (0L, nseeds); autoThingVector <structFeatureWeights> cs (0L, nseeds); for (long y = 0; y <= nseeds; y++) { cs [y] = FeatureWeights_create (my input -> nx); } for (long x = 1; x <= my input -> nx; x ++) cs [nseeds] -> fweights -> data [1] [x] = pivot; results [nseeds] = KNN_evaluate (me, cs [nseeds].get(), k, d, emode); while (range > 0 && results [nseeds] < stop) { long best = nseeds; if (mode == 2) { for (long x = 1; x <= (my input)->nx; x ++) { for (long y = 0; y < nseeds; y ++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); results[y] = KNN_evaluate (me, cs [y].get(), k, d, emode); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= (my input)->nx; x++) // HELP FIXME the same index for an inner and an outer loop!!! cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } } else { for (long y = 0; y < nseeds; y++) { for (long x = 1; x <= (my input)->nx; x++) { cs[y]->fweights->data[1][x] = NUMrandomUniform(OlaMAX(0, cs[nseeds]->fweights->data[1][x] - range), OlaMIN(1, cs[nseeds]->fweights->data[1][x] + range)); } results[y] = KNN_evaluate (me, cs [y].get(), k, d, emode); } for (long q = 0; q < nseeds; q++) if (results[q] > results[best]) best = q; if (results[best] > results[nseeds]) { for (long x = 1; x <= (my input)->nx; x++) cs[nseeds]->fweights->data[1][x] = cs[best]->fweights->data[1][x]; results[nseeds] = results[best]; } } range -= alfa; } autoFeatureWeights result = cs [nseeds].move(); return result; } catch (MelderError) { Melder_throw (U"FeatureWeights: wrapper not computed."); } }