TEST(TrigradedIndex, Equality)
{
  TrigradedIndex ind_1(3, 2, 1);
  TrigradedIndex ind_2(3, 2, 1);

  EXPECT_EQ(ind_1, ind_2);
}
예제 #2
0
파일: twins.c 프로젝트: cran/cluster
/*     used in splyt() above */
static double min_dis(double dys[], int ka, int kb, int ner[])
{
    double dm = 0.;
    for(int k = ka -1; k < kb -1; ++k) {
	int ner_k = ner[k];
	for (int j = k+1; j < kb; ++j) {
	    int k_j = ind_2(ner_k, ner[j]);
	    if (dm < dys[k_j])
		dm = dys[k_j];
	}
    }
    return dm;
} /* min_dis */
TEST(TrigradedIndex, LessThan)
{
  TrigradedIndex ind_1(4, -1, 11);
  TrigradedIndex ind_2(2, 2, 10);
  TrigradedIndex ind_3(2, 3, 4);
  TrigradedIndex ind_4(2, 5, 5);
  TrigradedIndex ind_5(2, 5, 6);

  EXPECT_LT(ind_1, ind_2);
  EXPECT_LT(ind_2, ind_3);
  EXPECT_LT(ind_3, ind_4);
  EXPECT_LT(ind_4, ind_5);
}
예제 #4
0
파일: pam.c 프로젝트: csilles/cxxr
/* -----------------------------------------------------------
     bswap(): the clustering algorithm in 2 parts:  I. build,	II. swap
*/
void bswap(int kk, int n, int *nrepr,
	   Rboolean med_given, Rboolean do_swap, int trace_lev,
	   /* nrepr[]: here is boolean (0/1): 1 = "is representative object"  */
	   double *dysma, double *dysmb, double *beter,
	   double *dys, double s, double *obj, int *pamonce)
{
    int i, j, ij, k,h, dig_n;
    double sky;

    /* Parameter adjustments */
    --nrepr;
    --beter;

    --dysma; --dysmb;

    if(trace_lev) Rprintf("pam()'s bswap(*, s=%g, pamonce=%d): ", s, *pamonce);

    s = s * 1.1 + 1.;// larger than all dys[]  (but DBL_MAX is too large)


/* IDEA: when n is large compared to k (= kk),
 * ----  rather use a "sparse" representation:
 * instead of boolean vector nrepr[] , use  ind_repr <- which(nrepr) !!
 */
    for (i = 1; i <= n; ++i)
	dysma[i] = s;

    if(med_given) {
	if(trace_lev) Rprintf("medoids given\n");

	/* compute dysma[] : dysma[j] = D(j, nearest_representative) */
	for (i = 1; i <= n; ++i) {
	    if (nrepr[i] == 1)
		for (j = 1; j <= n; ++j) {
		    ij = ind_2(i, j);
		    if (dysma[j] > dys[ij])
			dysma[j] = dys[ij];
		}
	}
    }
    else {

/*  ====== first algorithm: BUILD. ====== */

	if(trace_lev) Rprintf("build %d medoids:\n", kk);

	/* find  kk  representatives  aka medoids :  */

	for (k = 1; k <= kk; ++k) {

	    R_CheckUserInterrupt();

	    /* compute beter[i] for all non-representatives:
	     * also find ammax := max_{..} and nmax := argmax_i{beter[i]} ... */
	    int nmax = -1; /* -Wall */
	    double ammax, cmd;
	    ammax = 0.;
	    for (i = 1; i <= n; ++i) {
		if (nrepr[i] == 0) {
		    beter[i] = 0.;
		    for (j = 1; j <= n; ++j) {
			cmd = dysma[j] - dys[ind_2(i, j)];
			if (cmd > 0.)
			    beter[i] += cmd;
		    }
		    if (ammax <= beter[i]) {
			/*  does < (instead of <= ) work too? -- NO! */
			ammax = beter[i];
			nmax = i;
		    }
		}
	    }

	    nrepr[nmax] = 1;/* = .true. : found new representative */
	    if (trace_lev >= 2)
		Rprintf("    new repr. %d\n", nmax);

	    /* update dysma[] : dysma[j] = D(j, nearest_representative) */
	    for (j = 1; j <= n; ++j) {
		ij = ind_2(nmax, j);
		if (dysma[j] > dys[ij])
		    dysma[j] = dys[ij];
	    }
	}
	/* output of the above loop:  nrepr[], dysma[], ... */
    }

    if(trace_lev) /* >= 2 (?) */ {
	dig_n = 1+floor(log10(n));
	Rprintf("  after build: medoids are");
	for (i = 1; i <= n; ++i)
	    if(nrepr[i] == 1) Rprintf(" %*d", dig_n, i);
	if(trace_lev >= 3) {
	    Rprintf("\n  and min.dist dysma[1:n] are\n");
	    for (i = 1; i <= n; ++i) {
		Rprintf(" %6.3g", dysma[i]);
		if(i % 10 == 0) Rprintf("\n");
	    }
	    if(n % 10 != 0) Rprintf("\n");
	} else Rprintf("\n");
    } else dig_n = 1;// -Wall

    sky = 0.;
    for (j = 1; j <= n; ++j)
	sky += dysma[j];
    obj[0] = sky / n;

    if (do_swap && (kk > 1 || med_given)) {

	double dzsky;
	int hbest = -1, nbest = -1, kbest= -1; // -Wall
	int *medoids, *clustmembership;
	double *fvect;
	if(*pamonce) {
	    // add one to use R indices
	    medoids = (int*) R_alloc(kk+1, sizeof(int));
	    clustmembership = (int*) R_alloc(n+1, sizeof(int));
	    fvect = (double*) R_alloc(n+1, sizeof(double));
	    for (int k = 1, i = 1; i <= n; ++i) {
		if (nrepr[i]) {
		    medoids[k] = i;
		    k++;
		}
	    }
	} else { // -Wall :
	    clustmembership = medoids = (int*) NULL;
	    fvect = (double*) NULL;
	}

/* ====== second algorithm: SWAP. ====== */

	/* Hmm: In the following, we RE-compute dysma[];
	 *      don't need it first time; then only need *update* after swap */

/*--   Loop : */
    L60:
	if(*pamonce == 0) { // original algorihtm
	    for (j = 1; j <= n; ++j) {
		/*  dysma[j] := D_j  d(j, <closest medi>)  [KR p.102, 104]
		 *  dysmb[j] := E_j  d(j, <2-nd cl.medi>)  [p.103] */
		dysma[j] = s;
		dysmb[j] = s;
		for (i = 1; i <= n; ++i) {
		    if (nrepr[i]) {
			ij = ind_2(i, j);
			if (dysma[j] > dys[ij]) {
			    dysmb[j] = dysma[j];
			    dysma[j] = dys[ij];
			} else if (dysmb[j] > dys[ij]) {
			    dysmb[j] = dys[ij];
			}
		    }
		}
	    }
	} else { // *pamonce == 1 or == 2 :
	    for (j = 1; j <= n; ++j) {
		/*  dysma[j] := D_j  d(j, <closest medi>)  [KR p.102, 104]
		 *  dysmb[j] := E_j  d(j, <2-nd cl.medi>)  [p.103] */
		dysma[j] = s;
		dysmb[j] = s;
		for(k = 1; k <= kk; k++) {
		    i = medoids[k];
		    ij = ind_2(i, j);
		    if (dysma[j] > dys[ij]) {
			//store cluster membership
			clustmembership[j] = i;
			dysmb[j] = dysma[j];
			dysma[j] = dys[ij];
		    } else if (dysmb[j] > dys[ij]) {
			dysmb[j] = dys[ij];
		    }
		}
	    }
	}

	dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */

	if(*pamonce == 0) { // original algorihtm
	    for (h = 1; h <= n; ++h) if (!nrepr[h]) {
		    R_CheckUserInterrupt();
		    for (i = 1; i <= n; ++i) if (nrepr[i]) {
			    double dz = 0.;
			    /* dz := T_{ih} := sum_j C_{jih}  [p.104] : */
			    for (j = 1; j <= n; ++j) { /* if (!nrepr[j]) { */
				int hj = ind_2(h, j);
				ij = ind_2(i, j);
				if (dys[ij] == dysma[j]) {
				    double small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j];
				    dz += (- dysma[j] + small);
				} else if (dys[hj] < dysma[j]) /* 1c. */
				    dz += (- dysma[j] + dys[hj]);
			    }
			    if (dzsky > dz) {
				dzsky = dz; /* dzsky := min_{i,h} T_{i,h} */
				hbest = h;
				nbest = i;
			    }
			}
		}
	} else { // *pamonce == 1 or == 2 :
	    for(k = 1; k <= kk; k++) {
		R_CheckUserInterrupt();
		i=medoids[k];
		double removeCost = 0.;
		//Compute cost for removing the medoid
		for (j = 1; j <= n; ++j) {
		    if(clustmembership[j] == i) {
			removeCost+=(dysmb[j]-dysma[j]);
			fvect[j]=dysmb[j];
		    }
		    else{
			fvect[j]=dysma[j];
		    }
		}

		if (*pamonce == 1) {
		    // Now check possible new medoids h
		    for (h = 1; h <= n; ++h) if (!nrepr[h]) {
			    double addGain = removeCost;
			    // Compute gain of adding h as a medoid:
			    for (j = 1; j <= n; ++j) {
				int hj = ind_2(h, j);
				if(dys[hj] < fvect[j])
				    addGain += (dys[hj]-fvect[j]);
			    }
			    if (dzsky > addGain) {
				dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */
				hbest = h;
				nbest = i;
				kbest = k;
			    }
			}

		} else { // *pamonce == 2 :

		    // Now check possible new medoids h
		    for (h = 1; h <= n; ++h) if (!nrepr[h]) {
			    double addGain = removeCost - fvect[h]; // - fvect[h] since dys[h,h]=0;
			    // Compute gain of adding h as a medoid:
			    int ijbase = (h-2)*(h-1)/2;
			    for (j = 1; j < h; ++j) {
				int hj = ijbase+j;
				if(dys[hj] < fvect[j])
				    addGain += (dys[hj]-fvect[j]);
			    }
			    ijbase += h;// = (h-2)*(h-1)/2 + h
			    for (j = h+1; j <= n; ++j) {
				ijbase += j-2;
				if(dys[ijbase] < fvect[j])
				    addGain += (dys[ijbase]-fvect[j]);
			    }
			    if (dzsky > addGain) {
				dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */
				hbest = h;
				nbest = i;
				kbest = k;
			    }
			}
		}
	    }
	}

	if (dzsky < - 16*DBL_EPSILON * fabs(sky)) { // basically " < 0 ",
	    // but ' < 0 ' gave infinite loop, swapping the identical objects
	    // found an improving swap

	    if(trace_lev >= 2)
		Rprintf( "   swp new %*d <-> %*d old; decreasing diss. %7g by %g\n",
			 dig_n, hbest, dig_n, nbest, sky, dzsky);
	    nrepr[hbest] = 1;
	    nrepr[nbest] = 0;
	    if(*pamonce)
		medoids[kbest]=hbest;

	    sky += dzsky;
	    goto L60;
	}
    }
    obj[1] = sky / n;
} /* bswap */
예제 #5
0
파일: pam.c 프로젝트: csilles/cxxr
/* -----------------------------------------------------------
     Compute Silhouette Information :
 */
void dark(int kk, int nn, int *ncluv,
	  int *nsend, int *nelem, int *negbr,
	  double *syl, double *srank, double *avsyl, double *ttsyl,
	  double *dys, double *s, double *sylinf)
{
    int k, nsylr;
    /* pointers to sylinf[] columns -- sylinf[nn, 4] : */
    double *sylinf_2, *sylinf_3, *sylinf_4;
    sylinf_2 = sylinf	+ nn;
    sylinf_3 = sylinf_2 + nn;
    sylinf_4 = sylinf_3 + nn;

    /* Parameter adjustments */
    --avsyl;
    --ncluv;

    nsylr = 0;
    *ttsyl = 0.;
    for (k = 1; k <= kk; ++k) {
	/* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */
	int j,l, ntt = 0;
	for (j = 1; j <= nn; ++j) {
	    if (ncluv[j] == k) {
		nelem[ntt] = j;
		++ntt;
	    }
	}

	for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */
	    int k_, nj = nelem[j];
	    double dysb = *s * 1.1 + 1.;
	    negbr[j] = -1;
	    /* for all clusters  k_ != k : */
	    for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) {
		double db = 0.;
		int nbb = 0;
		for (l = 1; l <= nn; ++l) if (ncluv[l] == k_) {
		    ++nbb;
		    if (l != nj)
			db += dys[ind_2(nj, l)];
		}
		db /= nbb; /* now  db(k_) := mean( d[j, l]; l in C_{k_} ) */
		if (dysb > db) {
		    dysb = db;
		    negbr[j] = k_;
		}
	    }/* negbr[j] := arg max_{k_} db(k_) */
	    if (ntt > 1) {
		double dysa = 0.;
		for (l = 0; l < ntt; ++l) {
		    int nl = nelem[l];
		    if (nj != nl)
			dysa += dys[ind_2(nj, nl)];
		}
		dysa /= ntt - 1;
		if (dysa > 0.) {
		    if (dysb > 0.) {
			if (dysb > dysa)
			    syl[j] = 1. - dysa / dysb;
			else if (dysb < dysa)
			    syl[j] = dysb / dysa - 1.;
			else /* dysb == dysa: */
			    syl[j] = 0.;

			if (syl[j] < -1.)
			    syl[j] = -1.;
			else if (syl[j] > 1.)
			    syl[j] = 1.;

		    } else {
			syl[j] = -1.;
		    }
		}
		else /* dysa == 0 */ if (dysb > 0.)
		    syl[j] = 1.;
		else
		    syl[j] = 0.;
	    }
	    else { /*	  ntt == 1: */
		syl[j] = 0.;
	    }
	} /* for( j ) */
	avsyl[k] = 0.;
	if (ntt == 0) /* this can happen when medoids are user-specified !*/
	    continue; /* next k */

	for (j = 0; j < ntt; ++j) {
	    int lang=-1 /*Wall*/;
	    double symax = -2.;
	    for (l = 0; l < ntt; ++l) {
		if (symax < syl[l]) {
		    symax = syl[l];
		    lang = l;
		}
	    }
	    nsend[j] = lang;
	    srank[j] = symax; /* = syl[lang] */
	    avsyl[k] += srank[j];
	    syl[lang] = -3.;
	}
	*ttsyl += avsyl[k];
	avsyl[k] /= ntt;
	if (ntt == 1) {
	    sylinf  [nsylr] = (double) k;
	    sylinf_2[nsylr] = (double) negbr[0];
	    sylinf_3[nsylr] = 0.;
	    sylinf_4[nsylr] = (double) nelem[0];
	    ++nsylr;
	} else {
	    for (j = 0; j < ntt; ++j) {
		int lplac = nsend[j];
		sylinf	[nsylr] = (double) k;
		sylinf_2[nsylr] = (double) negbr[lplac];
		sylinf_3[nsylr] = srank[j];
		sylinf_4[nsylr] = (double) nelem[lplac];
		++nsylr;
	    }
	}
    } /* for (k) */
    *ttsyl /= nn;
} /* dark */
예제 #6
0
파일: pam.c 프로젝트: csilles/cxxr
/* -----------------------------------------------------------
 cstat(): Compute STATistics (numerical output) concerning each partition
*/
void cstat(int *kk, int *nn, int *nsend, int *nrepr, Rboolean all_stats,
	   double *radus, double *damer, double *avsyl, double *separ, double *s,
	   double *dys, int *ncluv, int *nelem, int *med, int *nisol)
{
    int j, k, ja, jk, nplac, ksmal = -1/* -Wall */;
    double ss = *s * 1.1 + 1.;

    /* Parameter adjustments */
    --nisol;
    --med;
    --nelem;
    --ncluv;
    --separ;
    --avsyl;
    --damer;
    --radus;
    --nrepr;
    --nsend;

    /* nsend[j] := i,  where x[i,] is the medoid to which x[j,] belongs */
    for (j = 1; j <= *nn; ++j) {
	if (nrepr[j] == 0) {
	    double dsmal = ss;
	    for (k = 1; k <= *nn; ++k) {
		if (nrepr[k] == 1) {
		    int kj_ = ind_2(k, j);
		    if (dsmal > dys[kj_]) {
			dsmal = dys[kj_];
			ksmal = k;
		    }
		}
	    }
	    nsend[j] = ksmal;
	} else {
	    nsend[j] = j;
	}
    }
    /* ncluv[j] := k , the cluster number  (k = 1..*kk) */
    jk = 1;
    nplac = nsend[1];
    for (j = 1; j <= *nn; ++j) {
	ncluv[j] = 0;
	if (nsend[j] == nplac)
	    ncluv[j] = 1;
    }
    for (ja = 2; ja <= *nn; ++ja) {
	nplac = nsend[ja];
	if (ncluv[nplac] == 0) {
	    ++jk;
	    for (j = 2; j <= *nn; ++j) {
		if (nsend[j] == nplac)
		    ncluv[j] = jk;
	    }
	    if (jk == *kk)
		break;
	}
    }

    if(all_stats) { /*	   analysis of the clustering. */
	int numl;
	for (k = 1; k <= *kk; ++k) {
	    int ntt = 0, m = -1/* -Wall */;
	    double ttt = 0.;
	    radus[k] = -1.;
	    R_CheckUserInterrupt();
	    for (j = 1; j <= *nn; ++j) {
		if (ncluv[j] == k) {
		    double djm;
		    ++ntt;
		    m = nsend[j];
		    nelem[ntt] = j;
		    djm = dys[ind_2(j, m)];
		    ttt += djm;
		    if (radus[k] < djm)
			radus[k] = djm;
		}
	    }
	    if(ntt == 0) REprintf("bug in C cstat(): ntt=0 !!!\n");
	    avsyl[k] = ttt / ntt;
	    med[k] = m;
	}

	if (*kk == 1) {
	    damer[1] = *s;
	    nrepr[1] = *nn;
	    return;
	}
	/*  ELSE	  kk > 1 : */

	/* numl = number of L-clusters. */
	numl = 0;
	for (k = 1; k <= *kk; ++k) {
	    /*
	      identification of cluster k:
	      nelem= vector of object indices,
	      nel  = number of objects
	    */
	    int nel = 0;
	    R_CheckUserInterrupt();

	    for (j = 1; j <= *nn; ++j) {
		if (ncluv[j] == k) {
		    ++nel;
		    nelem[nel] = j;
		}
	    }
	    nrepr[k] = nel;
	    if (nel == 1) {
		int nvn = nelem[1];
		damer[k] = 0.;
		separ[k] = ss;
		for (j = 1; j <= *nn; ++j) {
		    if (j != nvn) {
			int mevj = ind_2(nvn, j);
			if (separ[k] > dys[mevj])
			    separ[k] = dys[mevj];
		    }
		}

		/* Is cluster k
		   1) an L-cluster	 or
		   2) an L*-cluster ? */
		if (separ[k] == 0.)
		    ++numl;

	    }
	    else { /*	       nel != 1 : */
		double dam = -1., sep = ss;
		Rboolean kand = TRUE;
		for (ja = 1; ja <= nel; ++ja) {
		    int jb, nvna = nelem[ja];
		    double aja = -1., ajb = ss;
		    for (jb = 1; jb <= *nn; ++jb) {
			int jndz = ind_2(nvna, jb);
			if (ncluv[jb] == k) {
			    if (aja < dys[jndz])
				aja = dys[jndz];
			} else {
			    if (ajb > dys[jndz])
				ajb = dys[jndz];
			}
		    }
		    if (kand && aja >= ajb)
			kand = FALSE;
		    if (dam < aja)
			dam = aja;
		    if (sep > ajb)
			sep = ajb;
		}
		separ[k] = sep;
		damer[k] = dam;
		if (kand) {
		    ++numl;
		    if (dam >= sep) /*	L-cluster */
			nisol[k] = 1;
		    else/*		L*-cluster */
			nisol[k] = 2;
		    continue /* k */;
		}
	    }
	    /* nel = 1 or (!kand) : */
	    nisol[k] = 0;

	}/* for(k) */

    } /* all_stats */

} /* cstat */
예제 #7
0
파일: twins.c 프로젝트: cran/cluster
/*     AGNES agglomeration */
static void
agnes(int nn, int *kwan, int *ner, double *ban,
      double dys[], int method, double *alpha, int *merge, int trace_lev)
{

/* VARs */
    int n_1 = nn - 1, _d, j, k, la = -1, lb = -1; // -Wall]
    Rboolean has_a3 = FALSE, has_a4 = FALSE,// is alpha[3] or [4] != 0 -- for Lance-Williams
	flex_meth = (method == 6 || method == 7);
    // 6: "flexible": "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams'
    // 7: "gaverage" aka Flexible UPGMA (Belbin et al., 1992)

    /* Parameter adjustments */
    --ban;
    --ner;
    --kwan;
    --alpha;

    if(trace_lev) {
	_d = (nn >= 100) ? 3 : (nn >= 10) ? 2 : 1;
	Rprintf("C agnes(n=%*d, method = %d, ..): ", _d,nn, method);
    } else _d = -1;// -Wall

    if(flex_meth) {
	has_a3 = (alpha[3] != 0.);
	has_a4 = (alpha[4] != 0.);
	if(trace_lev) {
	    if(has_a4)
		Rprintf("|par| = 4, alpha[1:4] = (%g,%g,%g,%g); ",
			alpha[1],alpha[2],alpha[3],alpha[4]);
	    else if(has_a3)
		Rprintf("|par| = 3, alpha[1:3] = (%g,%g,%g); ",
			alpha[1],alpha[2],alpha[3]);
	}
    }

//  Starting with nn clusters, kwan[j] := #{obj} in cluster j
    for (j = 1; j <= nn; ++j) {
	kwan[j] = 1;
	ner[j] = j;
    }

// ----------------------------------------------------------------------------
/*     find closest clusters */
    if(trace_lev) Rprintf("%d merging steps\n", n_1);
    for (int nmerge = 0; nmerge < n_1; ++nmerge) {
	// j := min_j { kwan[j] > 0} = first non-empty cluster (j >= 2)
	j = 1; do { j++; } while(kwan[j] == 0);
	if(trace_lev >= 2) Rprintf(" nmerge=%*d, j=%*d, ", _d,nmerge, _d,j);

	double d_min = dys[ind_2(1, j)] * 1.1f + 1.;
	for (k = 1; k <= n_1; ++k) if (kwan[k] > 0) {
		for (j = k + 1; j <= nn; ++j) if (kwan[j] > 0) {
			int k_j = ind_2(k, j);
			if (d_min >= dys[k_j]) { // Note: also when "==" !
			    d_min =  dys[k_j];
			    la = k;
			    lb = j;
			}
		    }
	    }
	// --> closest clusters  {la < lb}  are at distance  d_min
	if(trace_lev >= 2) Rprintf("d_min = D(%*d,%*d) = %#g; ", _d,la, _d,lb, d_min);

/*     merge-structure for plotting tree in S */

	int l1 = -la,
	    l2 = -lb;
	for (j = 0; j < nmerge; ++j) {
	    if (Merge(j, 1) == l1 || Merge(j, 2) == l1)  l1 = j+1;
	    if (Merge(j, 1) == l2 || Merge(j, 2) == l2)  l2 = j+1;
	}
	Merge(nmerge, 1) = l1;
	Merge(nmerge, 2) = l2;
	if(trace_lev >= 3) Rprintf(" -> (%*d,%*d); ", _d,l1, _d,l2);

	if(flex_meth && l1 == l2) {
	    // can happen with non-sensical (alpha_1,alpha_2,beta, ...)
	    error(_("agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"),
		  method, nmerge+1, d_min);
	}

/*     determine lfyrs and llast */

	int llast = -1, lfyrs = -1; // -Wall
	for (k = 1; k <= nn; ++k) {
	    if (ner[k] == la) lfyrs = k;
	    if (ner[k] == lb) llast = k;
	}
	ban[llast] = d_min;

	if(trace_lev >= 2) Rprintf("last=%*d;", _d,llast);

/*     if the two clusters are next to each other, ner must not be changed */

	int lnext = lfyrs + kwan[la];
	if (lnext != llast) { /*     updating ner and ban */
	    if(trace_lev >= 2) Rprintf(" upd(n,b);");
	    int lput = lfyrs + kwan[la],
		lenda = llast + kwan[lb] - 2;
	    for (k = 1; k <= llast - lput; ++k) {
		int lka = ner[lput];
		double akb = ban[lput];
		for (j = lput; j <= lenda; ++j) {
		    ner[j] = ner[j + 1];
		    ban[j] = ban[j + 1];
		}
		ner[lenda+1] = lka;
		ban[lenda+1] = akb;
	    }
	}
	if(trace_lev >= 3) Rprintf("\n");

/*     We will merge A & B into  A_{new} */

	// Calculate new dissimilarities d(q, A_{new})
	for (int lq = 1; lq <= nn; ++lq) { //  for each other cluster 'q'

	    if (lq == la || lq == lb || kwan[lq] == 0)
		continue;

	    int naq = ind_2(la, lq);
	    int nbq = ind_2(lb, lq);
	    if(trace_lev >= 3)
		Rprintf(" old D(A, j), D(B, j), j=%*d  = (%g,%g); ",
			_d,lq, dys[naq], dys[nbq]);

	    switch(method) {
	    case 1: { //   1: unweighted pair-]group average method, UPGMA
		double
		    ta = (double) kwan[la],
		    tb = (double) kwan[lb],
		    fa = ta / (ta + tb),
		    fb = tb / (ta + tb);
		dys[naq] = fa * dys[naq] + fb * dys[nbq];
		break;
	    }
	    case 2: /*     2: single linkage */
		dys[naq] = fmin2(dys[naq], dys[nbq]);
		break;
	    case 3: /*     3: complete linkage */
		dys[naq] = fmax2(dys[naq], dys[nbq]);
		break;
            case 4: { //   4: ward's method
		double
		    ta = (double) kwan[la],
		    tb = (double) kwan[lb],
		    tq = (double) kwan[lq],
		    fa = (ta + tq) / (ta + tb + tq),
		    fb = (tb + tq) / (ta + tb + tq),
		    fc = -tq / (ta + tb + tq);
		int nab = ind_2(la, lb);
		dys[naq] = sqrt(fa * dys[naq] * dys[naq] +
				fb * dys[nbq] * dys[nbq] +
				fc * dys[nab] * dys[nab]);
		break;
	    }
	    case 5: /*     5: weighted average linkage */
		dys[naq] = (dys[naq] + dys[nbq]) / 2.;
		break;
	    case 6: { //   6: "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams'
		double dNew = alpha[1] * dys[naq] + alpha[2] * dys[nbq];
		if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)];
		if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]);
		dys[naq] = dNew;
		/* Lance-Williams would allow alpha(1:2) to *depend* on |cluster|
		 * could also include the extensions of Jambu(1978) --
		 * See Gordon A.D. (1999) "Classification" (2nd ed.) p.78 ff */
		break;
	    }
	    case 7: {/*    7: generalized "average" = Flexible UPGMA (Belbin et al., 1992)
		      * Applies the flexible Lance-Williams formula to the UPGMA, aka
		      * "average" case 1 above, i.e., alpha_{1,2} depend on cluster sizes: */
		double
		    ta = (double) kwan[la],
		    tb = (double) kwan[lb],
		    fa = alpha[1] * ta / (ta + tb),
		    fb = alpha[2] * tb / (ta + tb),
		    dNew = fa * dys[naq] + fb * dys[nbq];
		if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)];
		if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]);
		dys[naq] = dNew;
		break;
	    }
	    default:
		error(_("invalid method (code %d)"), method);
	    }
	    if(trace_lev >= 3)
		Rprintf(" new D(A', %*d) = %g\n", _d,lq, dys[naq]);
	} // for (lq ..)

	kwan[la] += kwan[lb];
	kwan[lb] = 0;
	if(trace_lev >= 2)
	    Rprintf("%s size(A_new)= %d\n", (trace_lev >= 3)? " --> " : "", kwan[la]);

    }// for(nmerge ..)
    return;
} /* agnes */
예제 #8
0
파일: twins.c 프로젝트: cran/cluster
static void
splyt(int nn, int *kwan, int *ner, double *ban,
      double dys[], int stop_at_k, int *merge, int trace_lev)
{
    /* Local variables */
    int j, ja, jb, k, l;
    int jma, jmb, lmm, llq, lmz,
	lxx, lmma, lmmb, lner, nclu;
    int lchan, nhalf, n_1 = nn - 1, splyn;

    /* Parameter adjustments */
    --ban;
    --ner;
    --kwan;


    /*     initialization */
    nclu = 1;
    nhalf = nn * n_1 / 2 + 1;
    for (l = 1; l <= nn; ++l) {
	kwan[l] = 0;
	ban[l] = 0.;
	ner[l] = l;
    }
    kwan[1] = nn;
    ja = 1;

/*     cs :=  diameter of data set */

    double cs = 0.;
    for(k = 0; k < nhalf; ++k) {
	if (cs < dys[k])
	    cs = dys[k];
    }
    if(trace_lev)
	Rprintf("C diana(): ndist= %d, diameter = %g\n", nhalf, cs);

/*     prepare for splitting */

//____________ Big Loop _________________________________________________
L30:
    jb = ja + kwan[ja] - 1;
    jma = jb;

    if (kwan[ja] == 2) { // special case of a pair of objects
	kwan[ja] = 1;
	kwan[jb] = 1;
	ban [jb] = dys[ind_2(ner[ja], ner[jb])];
    }
    else {
	/*     finding first object to be shifted */
	double bygsd = -1.;
	int  lndsd = -1;
	for (l = ja; l <= jb; ++l) {
	    lner = ner[l];
	    double sd = 0.;
	    for (j = ja; j <= jb; ++j)
		sd += dys[ind_2(lner, ner[j])];
	    if (bygsd < sd) {
		bygsd = sd;
		lndsd = l;
	    }
	}

/*     shifting the first object */
	--kwan[ja];
	kwan[jb] = 1;
	if (jb != lndsd) {
	    lchan = ner[lndsd];
	    lmm = jb - 1;
	    for (lmma = lndsd; lmma <= lmm; ++lmma) {
		lmmb = lmma + 1;
		ner[lmma] = ner[lmmb];
	    }
	    ner[jb] = lchan;
	}
	splyn = 0;
	jma = jb - 1;

/*     finding the next object to be shifted */

	do {
	    splyn++;
	    int rest = (jma - ja), jaway = -1;
	    double bdyff = -1.;
	    for (l = ja; l <= jma; ++l) {
		lner = ner[l];
		double da = 0., db = 0.;
		for (j = ja; j <= jma; ++j)
		    da += dys[ind_2(lner, ner[j])];
		da /= rest;
		for (j = jma + 1; j <= jb; ++j)
		    db += dys[ind_2(lner, ner[j])];
		db /= splyn;
		double dyff = da - db;
		if (bdyff < dyff) {
		    bdyff = dyff;
		    jaway = l;
		}
	    } /* end for(l ..) */
	    jmb = jma + 1;

/*     shifting the next object when necessary */

	    if (bdyff <= 0.)
		break; // out of  "object shifting"  while(.) loop

	    if (jma != jaway) {
		lchan = ner[jaway];
		lmz = jma - 1;
		for (lxx = jaway; lxx <= lmz; ++lxx)
		    ner[lxx] = ner[lxx + 1];
		ner[jma] = lchan;
	    }
	    for (lxx = jmb; lxx <= jb; ++lxx) {
		int l_1 = lxx - 1;
		if (ner[l_1] < ner[lxx])
		    break;
		lchan = ner[l_1]; ner[l_1] = ner[lxx]; ner[lxx] = lchan;
	    }

	    --kwan[ja];
	    kwan[jma] = kwan[jmb] + 1;
	    kwan[jmb] = 0;
	    --jma;
	    jmb = jma + 1;

	} while (jma != ja);


// 200:     switch the two parts when necessary

	if (ner[ja] >= ner[jmb]) {
	    int lxxa = ja;
	    for (int lgrb = jmb; lgrb <= jb; ++lgrb) {
		++lxxa;
		lchan = ner[lgrb];
		int lxg = -1;
		for (int ll = lxxa; ll <= lgrb; ++ll) {
		    int lxf = lgrb - ll + lxxa;
		    lxg = lxf - 1;
		    ner[lxf] = ner[lxg];
		}
		ner[lxg] = lchan;
	    }
	    llq = kwan[jmb];
	    kwan[jmb] = 0;
	    jma = ja + jb - jma - 1;
	    jmb = jma + 1;
	    kwan[jmb] = kwan[ja];
	    kwan[ja] = llq;
	}

/* 300 :    compute level for banner */

	if (nclu == 1) {
	    ban[jmb] = cs;
	} else {
	    ban[jmb] = min_dis(dys, ja, jb, &ner[1]);
	}

    }

    if (++nclu < nn) { /* continue splitting until all objects are separated */
	if (jb != nn) {
 L420:
	    ja += kwan[ja];
	    if (ja <= nn) {
		if (kwan[ja] <= 1)
		    goto L420;
		else
		    goto L30;
	    }
	}
	ja = 1;
	if (kwan[ja] == 1)
	    goto L420;
	else
	    goto L30;
    }
//____________ End Big Loop _________________________________________________

/* 500 :  merge-structure for plotting tree in S */

    for (int nmerge = 0; nmerge < n_1; ++nmerge) {
	int nj = -1, l1, l2;
	double dmin = cs;
	for (j = 2; j <= nn; ++j) {
	    if (kwan[j] >= 0 && dmin >= ban[j]) {
		dmin = ban[j];
		nj = j;
	    }
	}
	kwan[nj] = -1;
	l1 = -ner[nj - 1];
	l2 = -ner[nj];
	for (j = 0; j < nmerge; ++j) {
	    if (Merge(j, 1) == l1 || Merge(j, 2) == l1)  l1 = j+1;
	    if (Merge(j, 1) == l2 || Merge(j, 2) == l2)  l2 = j+1;
	}
	Merge(nmerge, 1) = l1;
	Merge(nmerge, 2) = l2;
    }
    return;
} /* splyt */