TEST(TrigradedIndex, Equality) { TrigradedIndex ind_1(3, 2, 1); TrigradedIndex ind_2(3, 2, 1); EXPECT_EQ(ind_1, ind_2); }
/* used in splyt() above */ static double min_dis(double dys[], int ka, int kb, int ner[]) { double dm = 0.; for(int k = ka -1; k < kb -1; ++k) { int ner_k = ner[k]; for (int j = k+1; j < kb; ++j) { int k_j = ind_2(ner_k, ner[j]); if (dm < dys[k_j]) dm = dys[k_j]; } } return dm; } /* min_dis */
TEST(TrigradedIndex, LessThan) { TrigradedIndex ind_1(4, -1, 11); TrigradedIndex ind_2(2, 2, 10); TrigradedIndex ind_3(2, 3, 4); TrigradedIndex ind_4(2, 5, 5); TrigradedIndex ind_5(2, 5, 6); EXPECT_LT(ind_1, ind_2); EXPECT_LT(ind_2, ind_3); EXPECT_LT(ind_3, ind_4); EXPECT_LT(ind_4, ind_5); }
/* ----------------------------------------------------------- bswap(): the clustering algorithm in 2 parts: I. build, II. swap */ void bswap(int kk, int n, int *nrepr, Rboolean med_given, Rboolean do_swap, int trace_lev, /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */ double *dysma, double *dysmb, double *beter, double *dys, double s, double *obj, int *pamonce) { int i, j, ij, k,h, dig_n; double sky; /* Parameter adjustments */ --nrepr; --beter; --dysma; --dysmb; if(trace_lev) Rprintf("pam()'s bswap(*, s=%g, pamonce=%d): ", s, *pamonce); s = s * 1.1 + 1.;// larger than all dys[] (but DBL_MAX is too large) /* IDEA: when n is large compared to k (= kk), * ---- rather use a "sparse" representation: * instead of boolean vector nrepr[] , use ind_repr <- which(nrepr) !! */ for (i = 1; i <= n; ++i) dysma[i] = s; if(med_given) { if(trace_lev) Rprintf("medoids given\n"); /* compute dysma[] : dysma[j] = D(j, nearest_representative) */ for (i = 1; i <= n; ++i) { if (nrepr[i] == 1) for (j = 1; j <= n; ++j) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } } else { /* ====== first algorithm: BUILD. ====== */ if(trace_lev) Rprintf("build %d medoids:\n", kk); /* find kk representatives aka medoids : */ for (k = 1; k <= kk; ++k) { R_CheckUserInterrupt(); /* compute beter[i] for all non-representatives: * also find ammax := max_{..} and nmax := argmax_i{beter[i]} ... */ int nmax = -1; /* -Wall */ double ammax, cmd; ammax = 0.; for (i = 1; i <= n; ++i) { if (nrepr[i] == 0) { beter[i] = 0.; for (j = 1; j <= n; ++j) { cmd = dysma[j] - dys[ind_2(i, j)]; if (cmd > 0.) beter[i] += cmd; } if (ammax <= beter[i]) { /* does < (instead of <= ) work too? -- NO! */ ammax = beter[i]; nmax = i; } } } nrepr[nmax] = 1;/* = .true. : found new representative */ if (trace_lev >= 2) Rprintf(" new repr. %d\n", nmax); /* update dysma[] : dysma[j] = D(j, nearest_representative) */ for (j = 1; j <= n; ++j) { ij = ind_2(nmax, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } /* output of the above loop: nrepr[], dysma[], ... */ } if(trace_lev) /* >= 2 (?) */ { dig_n = 1+floor(log10(n)); Rprintf(" after build: medoids are"); for (i = 1; i <= n; ++i) if(nrepr[i] == 1) Rprintf(" %*d", dig_n, i); if(trace_lev >= 3) { Rprintf("\n and min.dist dysma[1:n] are\n"); for (i = 1; i <= n; ++i) { Rprintf(" %6.3g", dysma[i]); if(i % 10 == 0) Rprintf("\n"); } if(n % 10 != 0) Rprintf("\n"); } else Rprintf("\n"); } else dig_n = 1;// -Wall sky = 0.; for (j = 1; j <= n; ++j) sky += dysma[j]; obj[0] = sky / n; if (do_swap && (kk > 1 || med_given)) { double dzsky; int hbest = -1, nbest = -1, kbest= -1; // -Wall int *medoids, *clustmembership; double *fvect; if(*pamonce) { // add one to use R indices medoids = (int*) R_alloc(kk+1, sizeof(int)); clustmembership = (int*) R_alloc(n+1, sizeof(int)); fvect = (double*) R_alloc(n+1, sizeof(double)); for (int k = 1, i = 1; i <= n; ++i) { if (nrepr[i]) { medoids[k] = i; k++; } } } else { // -Wall : clustmembership = medoids = (int*) NULL; fvect = (double*) NULL; } /* ====== second algorithm: SWAP. ====== */ /* Hmm: In the following, we RE-compute dysma[]; * don't need it first time; then only need *update* after swap */ /*-- Loop : */ L60: if(*pamonce == 0) { // original algorihtm for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, <closest medi>) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for (i = 1; i <= n; ++i) { if (nrepr[i]) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) { dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } } else { // *pamonce == 1 or == 2 : for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, <closest medi>) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for(k = 1; k <= kk; k++) { i = medoids[k]; ij = ind_2(i, j); if (dysma[j] > dys[ij]) { //store cluster membership clustmembership[j] = i; dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */ if(*pamonce == 0) { // original algorihtm for (h = 1; h <= n; ++h) if (!nrepr[h]) { R_CheckUserInterrupt(); for (i = 1; i <= n; ++i) if (nrepr[i]) { double dz = 0.; /* dz := T_{ih} := sum_j C_{jih} [p.104] : */ for (j = 1; j <= n; ++j) { /* if (!nrepr[j]) { */ int hj = ind_2(h, j); ij = ind_2(i, j); if (dys[ij] == dysma[j]) { double small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j]; dz += (- dysma[j] + small); } else if (dys[hj] < dysma[j]) /* 1c. */ dz += (- dysma[j] + dys[hj]); } if (dzsky > dz) { dzsky = dz; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; } } } } else { // *pamonce == 1 or == 2 : for(k = 1; k <= kk; k++) { R_CheckUserInterrupt(); i=medoids[k]; double removeCost = 0.; //Compute cost for removing the medoid for (j = 1; j <= n; ++j) { if(clustmembership[j] == i) { removeCost+=(dysmb[j]-dysma[j]); fvect[j]=dysmb[j]; } else{ fvect[j]=dysma[j]; } } if (*pamonce == 1) { // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost; // Compute gain of adding h as a medoid: for (j = 1; j <= n; ++j) { int hj = ind_2(h, j); if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } else { // *pamonce == 2 : // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost - fvect[h]; // - fvect[h] since dys[h,h]=0; // Compute gain of adding h as a medoid: int ijbase = (h-2)*(h-1)/2; for (j = 1; j < h; ++j) { int hj = ijbase+j; if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } ijbase += h;// = (h-2)*(h-1)/2 + h for (j = h+1; j <= n; ++j) { ijbase += j-2; if(dys[ijbase] < fvect[j]) addGain += (dys[ijbase]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } } } if (dzsky < - 16*DBL_EPSILON * fabs(sky)) { // basically " < 0 ", // but ' < 0 ' gave infinite loop, swapping the identical objects // found an improving swap if(trace_lev >= 2) Rprintf( " swp new %*d <-> %*d old; decreasing diss. %7g by %g\n", dig_n, hbest, dig_n, nbest, sky, dzsky); nrepr[hbest] = 1; nrepr[nbest] = 0; if(*pamonce) medoids[kbest]=hbest; sky += dzsky; goto L60; } } obj[1] = sky / n; } /* bswap */
/* ----------------------------------------------------------- Compute Silhouette Information : */ void dark(int kk, int nn, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *dys, double *s, double *sylinf) { int k, nsylr; /* pointers to sylinf[] columns -- sylinf[nn, 4] : */ double *sylinf_2, *sylinf_3, *sylinf_4; sylinf_2 = sylinf + nn; sylinf_3 = sylinf_2 + nn; sylinf_4 = sylinf_3 + nn; /* Parameter adjustments */ --avsyl; --ncluv; nsylr = 0; *ttsyl = 0.; for (k = 1; k <= kk; ++k) { /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */ int j,l, ntt = 0; for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { nelem[ntt] = j; ++ntt; } } for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */ int k_, nj = nelem[j]; double dysb = *s * 1.1 + 1.; negbr[j] = -1; /* for all clusters k_ != k : */ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) { double db = 0.; int nbb = 0; for (l = 1; l <= nn; ++l) if (ncluv[l] == k_) { ++nbb; if (l != nj) db += dys[ind_2(nj, l)]; } db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */ if (dysb > db) { dysb = db; negbr[j] = k_; } }/* negbr[j] := arg max_{k_} db(k_) */ if (ntt > 1) { double dysa = 0.; for (l = 0; l < ntt; ++l) { int nl = nelem[l]; if (nj != nl) dysa += dys[ind_2(nj, nl)]; } dysa /= ntt - 1; if (dysa > 0.) { if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* dysb == dysa: */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } else /* dysa == 0 */ if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; } else { /* ntt == 1: */ syl[j] = 0.; } } /* for( j ) */ avsyl[k] = 0.; if (ntt == 0) /* this can happen when medoids are user-specified !*/ continue; /* next k */ for (j = 0; j < ntt; ++j) { int lang=-1 /*Wall*/; double symax = -2.; for (l = 0; l < ntt; ++l) { if (symax < syl[l]) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = symax; /* = syl[lang] */ avsyl[k] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[k]; avsyl[k] /= ntt; if (ntt == 1) { sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[0]; sylinf_3[nsylr] = 0.; sylinf_4[nsylr] = (double) nelem[0]; ++nsylr; } else { for (j = 0; j < ntt; ++j) { int lplac = nsend[j]; sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[lplac]; sylinf_3[nsylr] = srank[j]; sylinf_4[nsylr] = (double) nelem[lplac]; ++nsylr; } } } /* for (k) */ *ttsyl /= nn; } /* dark */
/* ----------------------------------------------------------- cstat(): Compute STATistics (numerical output) concerning each partition */ void cstat(int *kk, int *nn, int *nsend, int *nrepr, Rboolean all_stats, double *radus, double *damer, double *avsyl, double *separ, double *s, double *dys, int *ncluv, int *nelem, int *med, int *nisol) { int j, k, ja, jk, nplac, ksmal = -1/* -Wall */; double ss = *s * 1.1 + 1.; /* Parameter adjustments */ --nisol; --med; --nelem; --ncluv; --separ; --avsyl; --damer; --radus; --nrepr; --nsend; /* nsend[j] := i, where x[i,] is the medoid to which x[j,] belongs */ for (j = 1; j <= *nn; ++j) { if (nrepr[j] == 0) { double dsmal = ss; for (k = 1; k <= *nn; ++k) { if (nrepr[k] == 1) { int kj_ = ind_2(k, j); if (dsmal > dys[kj_]) { dsmal = dys[kj_]; ksmal = k; } } } nsend[j] = ksmal; } else { nsend[j] = j; } } /* ncluv[j] := k , the cluster number (k = 1..*kk) */ jk = 1; nplac = nsend[1]; for (j = 1; j <= *nn; ++j) { ncluv[j] = 0; if (nsend[j] == nplac) ncluv[j] = 1; } for (ja = 2; ja <= *nn; ++ja) { nplac = nsend[ja]; if (ncluv[nplac] == 0) { ++jk; for (j = 2; j <= *nn; ++j) { if (nsend[j] == nplac) ncluv[j] = jk; } if (jk == *kk) break; } } if(all_stats) { /* analysis of the clustering. */ int numl; for (k = 1; k <= *kk; ++k) { int ntt = 0, m = -1/* -Wall */; double ttt = 0.; radus[k] = -1.; R_CheckUserInterrupt(); for (j = 1; j <= *nn; ++j) { if (ncluv[j] == k) { double djm; ++ntt; m = nsend[j]; nelem[ntt] = j; djm = dys[ind_2(j, m)]; ttt += djm; if (radus[k] < djm) radus[k] = djm; } } if(ntt == 0) REprintf("bug in C cstat(): ntt=0 !!!\n"); avsyl[k] = ttt / ntt; med[k] = m; } if (*kk == 1) { damer[1] = *s; nrepr[1] = *nn; return; } /* ELSE kk > 1 : */ /* numl = number of L-clusters. */ numl = 0; for (k = 1; k <= *kk; ++k) { /* identification of cluster k: nelem= vector of object indices, nel = number of objects */ int nel = 0; R_CheckUserInterrupt(); for (j = 1; j <= *nn; ++j) { if (ncluv[j] == k) { ++nel; nelem[nel] = j; } } nrepr[k] = nel; if (nel == 1) { int nvn = nelem[1]; damer[k] = 0.; separ[k] = ss; for (j = 1; j <= *nn; ++j) { if (j != nvn) { int mevj = ind_2(nvn, j); if (separ[k] > dys[mevj]) separ[k] = dys[mevj]; } } /* Is cluster k 1) an L-cluster or 2) an L*-cluster ? */ if (separ[k] == 0.) ++numl; } else { /* nel != 1 : */ double dam = -1., sep = ss; Rboolean kand = TRUE; for (ja = 1; ja <= nel; ++ja) { int jb, nvna = nelem[ja]; double aja = -1., ajb = ss; for (jb = 1; jb <= *nn; ++jb) { int jndz = ind_2(nvna, jb); if (ncluv[jb] == k) { if (aja < dys[jndz]) aja = dys[jndz]; } else { if (ajb > dys[jndz]) ajb = dys[jndz]; } } if (kand && aja >= ajb) kand = FALSE; if (dam < aja) dam = aja; if (sep > ajb) sep = ajb; } separ[k] = sep; damer[k] = dam; if (kand) { ++numl; if (dam >= sep) /* L-cluster */ nisol[k] = 1; else/* L*-cluster */ nisol[k] = 2; continue /* k */; } } /* nel = 1 or (!kand) : */ nisol[k] = 0; }/* for(k) */ } /* all_stats */ } /* cstat */
/* AGNES agglomeration */ static void agnes(int nn, int *kwan, int *ner, double *ban, double dys[], int method, double *alpha, int *merge, int trace_lev) { /* VARs */ int n_1 = nn - 1, _d, j, k, la = -1, lb = -1; // -Wall] Rboolean has_a3 = FALSE, has_a4 = FALSE,// is alpha[3] or [4] != 0 -- for Lance-Williams flex_meth = (method == 6 || method == 7); // 6: "flexible": "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams' // 7: "gaverage" aka Flexible UPGMA (Belbin et al., 1992) /* Parameter adjustments */ --ban; --ner; --kwan; --alpha; if(trace_lev) { _d = (nn >= 100) ? 3 : (nn >= 10) ? 2 : 1; Rprintf("C agnes(n=%*d, method = %d, ..): ", _d,nn, method); } else _d = -1;// -Wall if(flex_meth) { has_a3 = (alpha[3] != 0.); has_a4 = (alpha[4] != 0.); if(trace_lev) { if(has_a4) Rprintf("|par| = 4, alpha[1:4] = (%g,%g,%g,%g); ", alpha[1],alpha[2],alpha[3],alpha[4]); else if(has_a3) Rprintf("|par| = 3, alpha[1:3] = (%g,%g,%g); ", alpha[1],alpha[2],alpha[3]); } } // Starting with nn clusters, kwan[j] := #{obj} in cluster j for (j = 1; j <= nn; ++j) { kwan[j] = 1; ner[j] = j; } // ---------------------------------------------------------------------------- /* find closest clusters */ if(trace_lev) Rprintf("%d merging steps\n", n_1); for (int nmerge = 0; nmerge < n_1; ++nmerge) { // j := min_j { kwan[j] > 0} = first non-empty cluster (j >= 2) j = 1; do { j++; } while(kwan[j] == 0); if(trace_lev >= 2) Rprintf(" nmerge=%*d, j=%*d, ", _d,nmerge, _d,j); double d_min = dys[ind_2(1, j)] * 1.1f + 1.; for (k = 1; k <= n_1; ++k) if (kwan[k] > 0) { for (j = k + 1; j <= nn; ++j) if (kwan[j] > 0) { int k_j = ind_2(k, j); if (d_min >= dys[k_j]) { // Note: also when "==" ! d_min = dys[k_j]; la = k; lb = j; } } } // --> closest clusters {la < lb} are at distance d_min if(trace_lev >= 2) Rprintf("d_min = D(%*d,%*d) = %#g; ", _d,la, _d,lb, d_min); /* merge-structure for plotting tree in S */ int l1 = -la, l2 = -lb; for (j = 0; j < nmerge; ++j) { if (Merge(j, 1) == l1 || Merge(j, 2) == l1) l1 = j+1; if (Merge(j, 1) == l2 || Merge(j, 2) == l2) l2 = j+1; } Merge(nmerge, 1) = l1; Merge(nmerge, 2) = l2; if(trace_lev >= 3) Rprintf(" -> (%*d,%*d); ", _d,l1, _d,l2); if(flex_meth && l1 == l2) { // can happen with non-sensical (alpha_1,alpha_2,beta, ...) error(_("agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"), method, nmerge+1, d_min); } /* determine lfyrs and llast */ int llast = -1, lfyrs = -1; // -Wall for (k = 1; k <= nn; ++k) { if (ner[k] == la) lfyrs = k; if (ner[k] == lb) llast = k; } ban[llast] = d_min; if(trace_lev >= 2) Rprintf("last=%*d;", _d,llast); /* if the two clusters are next to each other, ner must not be changed */ int lnext = lfyrs + kwan[la]; if (lnext != llast) { /* updating ner and ban */ if(trace_lev >= 2) Rprintf(" upd(n,b);"); int lput = lfyrs + kwan[la], lenda = llast + kwan[lb] - 2; for (k = 1; k <= llast - lput; ++k) { int lka = ner[lput]; double akb = ban[lput]; for (j = lput; j <= lenda; ++j) { ner[j] = ner[j + 1]; ban[j] = ban[j + 1]; } ner[lenda+1] = lka; ban[lenda+1] = akb; } } if(trace_lev >= 3) Rprintf("\n"); /* We will merge A & B into A_{new} */ // Calculate new dissimilarities d(q, A_{new}) for (int lq = 1; lq <= nn; ++lq) { // for each other cluster 'q' if (lq == la || lq == lb || kwan[lq] == 0) continue; int naq = ind_2(la, lq); int nbq = ind_2(lb, lq); if(trace_lev >= 3) Rprintf(" old D(A, j), D(B, j), j=%*d = (%g,%g); ", _d,lq, dys[naq], dys[nbq]); switch(method) { case 1: { // 1: unweighted pair-]group average method, UPGMA double ta = (double) kwan[la], tb = (double) kwan[lb], fa = ta / (ta + tb), fb = tb / (ta + tb); dys[naq] = fa * dys[naq] + fb * dys[nbq]; break; } case 2: /* 2: single linkage */ dys[naq] = fmin2(dys[naq], dys[nbq]); break; case 3: /* 3: complete linkage */ dys[naq] = fmax2(dys[naq], dys[nbq]); break; case 4: { // 4: ward's method double ta = (double) kwan[la], tb = (double) kwan[lb], tq = (double) kwan[lq], fa = (ta + tq) / (ta + tb + tq), fb = (tb + tq) / (ta + tb + tq), fc = -tq / (ta + tb + tq); int nab = ind_2(la, lb); dys[naq] = sqrt(fa * dys[naq] * dys[naq] + fb * dys[nbq] * dys[nbq] + fc * dys[nab] * dys[nab]); break; } case 5: /* 5: weighted average linkage */ dys[naq] = (dys[naq] + dys[nbq]) / 2.; break; case 6: { // 6: "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams' double dNew = alpha[1] * dys[naq] + alpha[2] * dys[nbq]; if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)]; if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]); dys[naq] = dNew; /* Lance-Williams would allow alpha(1:2) to *depend* on |cluster| * could also include the extensions of Jambu(1978) -- * See Gordon A.D. (1999) "Classification" (2nd ed.) p.78 ff */ break; } case 7: {/* 7: generalized "average" = Flexible UPGMA (Belbin et al., 1992) * Applies the flexible Lance-Williams formula to the UPGMA, aka * "average" case 1 above, i.e., alpha_{1,2} depend on cluster sizes: */ double ta = (double) kwan[la], tb = (double) kwan[lb], fa = alpha[1] * ta / (ta + tb), fb = alpha[2] * tb / (ta + tb), dNew = fa * dys[naq] + fb * dys[nbq]; if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)]; if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]); dys[naq] = dNew; break; } default: error(_("invalid method (code %d)"), method); } if(trace_lev >= 3) Rprintf(" new D(A', %*d) = %g\n", _d,lq, dys[naq]); } // for (lq ..) kwan[la] += kwan[lb]; kwan[lb] = 0; if(trace_lev >= 2) Rprintf("%s size(A_new)= %d\n", (trace_lev >= 3)? " --> " : "", kwan[la]); }// for(nmerge ..) return; } /* agnes */
static void splyt(int nn, int *kwan, int *ner, double *ban, double dys[], int stop_at_k, int *merge, int trace_lev) { /* Local variables */ int j, ja, jb, k, l; int jma, jmb, lmm, llq, lmz, lxx, lmma, lmmb, lner, nclu; int lchan, nhalf, n_1 = nn - 1, splyn; /* Parameter adjustments */ --ban; --ner; --kwan; /* initialization */ nclu = 1; nhalf = nn * n_1 / 2 + 1; for (l = 1; l <= nn; ++l) { kwan[l] = 0; ban[l] = 0.; ner[l] = l; } kwan[1] = nn; ja = 1; /* cs := diameter of data set */ double cs = 0.; for(k = 0; k < nhalf; ++k) { if (cs < dys[k]) cs = dys[k]; } if(trace_lev) Rprintf("C diana(): ndist= %d, diameter = %g\n", nhalf, cs); /* prepare for splitting */ //____________ Big Loop _________________________________________________ L30: jb = ja + kwan[ja] - 1; jma = jb; if (kwan[ja] == 2) { // special case of a pair of objects kwan[ja] = 1; kwan[jb] = 1; ban [jb] = dys[ind_2(ner[ja], ner[jb])]; } else { /* finding first object to be shifted */ double bygsd = -1.; int lndsd = -1; for (l = ja; l <= jb; ++l) { lner = ner[l]; double sd = 0.; for (j = ja; j <= jb; ++j) sd += dys[ind_2(lner, ner[j])]; if (bygsd < sd) { bygsd = sd; lndsd = l; } } /* shifting the first object */ --kwan[ja]; kwan[jb] = 1; if (jb != lndsd) { lchan = ner[lndsd]; lmm = jb - 1; for (lmma = lndsd; lmma <= lmm; ++lmma) { lmmb = lmma + 1; ner[lmma] = ner[lmmb]; } ner[jb] = lchan; } splyn = 0; jma = jb - 1; /* finding the next object to be shifted */ do { splyn++; int rest = (jma - ja), jaway = -1; double bdyff = -1.; for (l = ja; l <= jma; ++l) { lner = ner[l]; double da = 0., db = 0.; for (j = ja; j <= jma; ++j) da += dys[ind_2(lner, ner[j])]; da /= rest; for (j = jma + 1; j <= jb; ++j) db += dys[ind_2(lner, ner[j])]; db /= splyn; double dyff = da - db; if (bdyff < dyff) { bdyff = dyff; jaway = l; } } /* end for(l ..) */ jmb = jma + 1; /* shifting the next object when necessary */ if (bdyff <= 0.) break; // out of "object shifting" while(.) loop if (jma != jaway) { lchan = ner[jaway]; lmz = jma - 1; for (lxx = jaway; lxx <= lmz; ++lxx) ner[lxx] = ner[lxx + 1]; ner[jma] = lchan; } for (lxx = jmb; lxx <= jb; ++lxx) { int l_1 = lxx - 1; if (ner[l_1] < ner[lxx]) break; lchan = ner[l_1]; ner[l_1] = ner[lxx]; ner[lxx] = lchan; } --kwan[ja]; kwan[jma] = kwan[jmb] + 1; kwan[jmb] = 0; --jma; jmb = jma + 1; } while (jma != ja); // 200: switch the two parts when necessary if (ner[ja] >= ner[jmb]) { int lxxa = ja; for (int lgrb = jmb; lgrb <= jb; ++lgrb) { ++lxxa; lchan = ner[lgrb]; int lxg = -1; for (int ll = lxxa; ll <= lgrb; ++ll) { int lxf = lgrb - ll + lxxa; lxg = lxf - 1; ner[lxf] = ner[lxg]; } ner[lxg] = lchan; } llq = kwan[jmb]; kwan[jmb] = 0; jma = ja + jb - jma - 1; jmb = jma + 1; kwan[jmb] = kwan[ja]; kwan[ja] = llq; } /* 300 : compute level for banner */ if (nclu == 1) { ban[jmb] = cs; } else { ban[jmb] = min_dis(dys, ja, jb, &ner[1]); } } if (++nclu < nn) { /* continue splitting until all objects are separated */ if (jb != nn) { L420: ja += kwan[ja]; if (ja <= nn) { if (kwan[ja] <= 1) goto L420; else goto L30; } } ja = 1; if (kwan[ja] == 1) goto L420; else goto L30; } //____________ End Big Loop _________________________________________________ /* 500 : merge-structure for plotting tree in S */ for (int nmerge = 0; nmerge < n_1; ++nmerge) { int nj = -1, l1, l2; double dmin = cs; for (j = 2; j <= nn; ++j) { if (kwan[j] >= 0 && dmin >= ban[j]) { dmin = ban[j]; nj = j; } } kwan[nj] = -1; l1 = -ner[nj - 1]; l2 = -ner[nj]; for (j = 0; j < nmerge; ++j) { if (Merge(j, 1) == l1 || Merge(j, 2) == l1) l1 = j+1; if (Merge(j, 1) == l2 || Merge(j, 2) == l2) l2 = j+1; } Merge(nmerge, 1) = l1; Merge(nmerge, 2) = l2; } return; } /* splyt */