double Nonleaf::ClosestDiffTwo(Stat *Stats, int &i, int &j) const { Entry tmpent; tmpent.Init(Stats->Dimension); int i1,j1,imin,jmin; double d, dmin; if (actsize<2) print_error("Nonleaf::ClosestDiffTwo","Less than 2 entries"); if (actsize==2) { d=distance(Stats->GDtype,entry[0],entry[1]); if (d==0) print_error("Nonleaf::ClosestDiffTwo", "Same 2 entries in a nonleaf: should not happen"); } dmin=HUGE_DOUBLE; imin=0; jmin=1; for (i1=0;i1<actsize-1;i1++) for (j1=i1+1;j1<actsize;j1++) { d = distance(Stats->GDtype,entry[i1],entry[j1]); if (d>0 && d<dmin) { imin = i1; jmin = j1; dmin = d;} } i=imin; j=jmin; tmpent.Add(entry[i],entry[j]); return tmpent.Fitness(Stats->Ftype); }
void Hierarchy0(int &n, // final number of clusters const int K, // final number of clusters Entry **entries, short GDtype, short Ftype, double Ft) { if (n<=1) return; int i, j, imin, jmin, done; short *checked = new short[n]; memset(checked,0,n*sizeof(short)); // 0: unchecked; // -1: exceeds the given threshold if merged with nearest neighbor; // -2: nonexistant after merging. double *dist = new double[n*(n-1)/2]; double d, dmin; Entry tmpent; tmpent.Init((*entries)[0].sx.dim); dmin = HUGE; // compute all initial distances and closest pair for (i=0; i<n-1; i++) for (j=i+1; j<n; j++) { d = distance(GDtype,(*entries)[i],(*entries)[j]); dist[i*n-i*(i+1)/2+j-i-1] = d; if (d<dmin) { dmin = d; imin = i; jmin = j; } } if (K==0) {// ****** case 1 ****** cluster by threshold ft done = FALSE; while (done==FALSE) { tmpent.Add((*entries)[imin],(*entries)[jmin]); if (tmpent.Fitness(Ftype) < Ft) { // within the threshold (*entries)[imin] += (*entries)[jmin]; checked[jmin] = -2; for (i=0; i<imin; i++) { if (checked[i]==0) { dist[i*n-i*(i+1)/2+imin-i-1] = distance(GDtype,(*entries)[i],(*entries)[imin]); }} for (j=imin+1; j<n; j++) { if (checked[j]==0) { dist[imin*n-imin*(imin+1)/2+j-imin-1] = distance(GDtype,(*entries)[imin],(*entries)[j]); }} } else { // exceeds the threshold checked[imin] = -1; checked[jmin] = -1; } done = TRUE; dmin = HUGE; for (i=0; i<n-1; i++) { if (checked[i]==0) { for (j=i+1; j<n; j++) { if (checked[j]==0) { d = dist[i*n-i*(i+1)/2+j-i-1]; if (d<dmin) { done = FALSE; dmin = d; imin = i; jmin = j; }}}}} } // end of while } // end of if else { // ***** case 2 ***** cluster by number k done = n; while (done > K) { (*entries)[imin] += (*entries)[jmin]; checked[jmin] = -2; done--; for (i=0; i<imin; i++) { if (checked[i]==0) { dist[i*n-i*(i+1)/2+imin-i-1] = distance(GDtype,(*entries)[i],(*entries)[imin]); }} for (j=imin+1; j<n; j++) { if (checked[j]==0) { dist[imin*n-imin*(imin+1)/2+j-imin-1] = distance(GDtype,(*entries)[imin],(*entries)[j]); }} dmin = HUGE; for (i=0; i<n-1; i++) { if (checked[i]==0) { for (j=i+1; j<n; j++) { if (checked[j]==0) { d = dist[i*n-i*(i+1)/2+j-i-1]; if (d<dmin) { dmin = d; imin = i; jmin = j; }}}}} } // end of while } // end of else j = 0; for (i=0; i<n; i++) if (checked[i]!=-2) { (*entries)[j]=(*entries)[i]; j++; } n=j; delete [] checked; delete [] dist; }