Ejemplo n.º 1
0
void DendrogramPanel::OnSplitLineChange(int x)
{
    wxSize sz = this->GetClientSize();
    double hh = sz.y;
    double ww = sz.x;
    
    cutoffDistance = maxDistance * (ww - margin - 30 - x) / (double) (ww - margin*2 - 30);
    
    for (int i = nelements-2; i >= 0; i--)
    {
        if (cutoffDistance >=  root[i].distance) {
            nclusters = nelements - i - 1;
            break;
        }
    }

    if (nclusters > max_n_clusters) nclusters = max_n_clusters;
    
    int* clusterid = new int[nelements];
    cuttree (nelements, root, nclusters, clusterid);
    
    for (int i=0; i<nelements; i++) {
        clusters[i] = clusterid[i]+1;
    }
    delete[] clusterid;
    
    // sort result
    std::vector<std::vector<int> > cluster_ids(nclusters);
    
    for (int i=0; i < clusters.size(); i++) {
        cluster_ids[ clusters[i] - 1 ].push_back(i);
    }
    
    std::sort(cluster_ids.begin(), cluster_ids.end(), GenUtils::less_vectors);
    
    for (int i=0; i < nclusters; i++) {
        int c = i + 1;
        for (int j=0; j<cluster_ids[i].size(); j++) {
            int idx = cluster_ids[i][j];
            clusters[idx] = c;
        }
    }
     
    wxWindow* parent = GetParent();
	while (parent) {
		wxWindow* w = parent;
		HClusterDlg* dlg = dynamic_cast<HClusterDlg*>(w);
		if (dlg) {
			dlg->UpdateClusterChoice(nclusters, clusters);
			color_vec.clear();
			CatClassification::PickColorSet(color_vec, nclusters);
			init();
			break;
		}
		parent = w->GetParent();
	}
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]){

	if(!argv[1]) {
		printf("\n./cutTree <Tree Name> <Nr. Cuts> <Mask Name> <Output Name>\n");
		return 0;
	}

	printf("\nLoading the dendrogram ... \n");
	FILE *treeFile = fopen(argv[1], "r");
	int goodValues;
	fscanf(treeFile, "%i", &goodValues);
	Node *tree = malloc((goodValues-1)*sizeof(Node));
    	for(int i=0; i<goodValues-1; i++){
        	fscanf(treeFile, "%i %i %lf", &(tree[i].left), &(tree[i].right), &(tree[i].distance));
   	}
	fclose(treeFile);

	printf("\nCutting the dendrogram ...\n");
        int * clusterid;
        int nr_clusters = atoi(argv[2]);
        clusterid = (int*)malloc(goodValues*sizeof(int));
        cuttree (goodValues, tree, nr_clusters, clusterid);
        free(tree);

	printf("\nLoading mask template ... \n");	
	FSLIO *fslio;
        fslio = FslInit();
        void *buffer = FslReadAllVolumes(fslio, argv[3]);
        double ***mask = FslGetVolumeAsScaledDouble(fslio, 0);
        FslClose(fslio);

	printf("\nSaving the image ...\n");
        nifti_image *nim = nifti_image_read(argv[3], 1);
        int nx = nim->nx; int ny = nim->ny; int nz = nim->nz;
        short *pData = (short *)nim->data;
        int c = 0; int c2 = 0;
        for(int k=0;k<nz;k++) {
        for(int j=0;j<ny;j++) {
        for(int i=0;i<nx;i++) {
                if ( mask[k][j][i] != 0 ) {
                        pData[c2] = clusterid[c]+1;
                        c++; c2++;
                }
                else {
                        pData[c2] = 0;
                        c2++;
                }
        }}}
        nim->data = (short *)pData;
        int ret = nifti_set_filenames(nim, argv[4], 0, 1);
        nifti_image_write( nim );
        nifti_image_free( nim );

	free(clusterid);
        return 0;			
}
Ejemplo n.º 3
0
void DendrogramPanel::UpdateCluster(int _nclusters, std::vector<wxInt64>& _clusters)
{
    if (root == NULL) return;
    
    int* clusterid = new int[nelements];
    cutoffDistance = cuttree (nelements, root, _nclusters, clusterid);
    
    for (int i=0; i<nelements; i++) {
        clusters[i] = clusterid[i]+1;
    }
    delete[] clusterid;
    
    // sort result
    std::vector<std::vector<int> > cluster_ids(_nclusters);
    
    for (int i=0; i < clusters.size(); i++) {
        cluster_ids[ clusters[i] - 1 ].push_back(i);
    }
    
    std::sort(cluster_ids.begin(), cluster_ids.end(), GenUtils::less_vectors);
    
    for (int i=0; i < _nclusters; i++) {
        int c = i + 1;
        for (int j=0; j<cluster_ids[i].size(); j++) {
            int idx = cluster_ids[i][j];
            clusters[idx] = c;
        }
    }
    
    for (int i=0; i<nelements; i++) {
        _clusters[i] = clusters[i];
    }
    
    if (_nclusters < max_n_clusters) {
        nclusters = _nclusters;
        color_vec.clear();
        CatClassification::PickColorSet(color_vec, nclusters);
    
        init();
    }
}
Ejemplo n.º 4
0
/* @api private */
VALUE rb_do_treecluster(int argc, VALUE *argv, VALUE self) {
    VALUE size, data, mask, weights, options;
    rb_scan_args(argc, argv, "21", &size, &data, &options);

    if (TYPE(data) != T_ARRAY)
        rb_raise(rb_eArgError, "data should be an array of arrays");

    mask = get_value_option(options, "mask", Qnil);

    if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
        rb_raise(rb_eArgError, "mask should be an array of arrays");

    if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
        rb_raise(rb_eArgError, "size should be > 0 and <= data size");

    int transpose = get_int_option(options, "transpose", 0);

    // s: pairwise single-linkage clustering
    // m: pairwise maximum- (or complete-) linkage clustering
    // a: pairwise average-linkage clustering
    // c: pairwise centroid-linkage clustering
    int method    = get_int_option(options, "method", 'a');

    // e = euclidian,
    // b = city-block distance
    // c = correlation
    // a = absolute value of the correlation
    // u = uncentered correlation
    // x = absolute uncentered correlation
    // s = spearman's rank correlation
    // k = kendall's tau
    int dist      = get_int_option(options, "metric", 'e');

    int i,j;
    int nrows = RARRAY_LEN(data);
    int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
    int nsets = NUM2INT(rb_Integer(size));

    double **cdata    = (double**)malloc(sizeof(double*)*nrows);
    int    **cmask    = (int   **)malloc(sizeof(int   *)*nrows);
    double *cweights  = (double *)malloc(sizeof(double )*ncols);

    int *ccluster, dimx = nrows, dimy = ncols;

    for (i = 0; i < nrows; i++) {
        cdata[i]          = (double*)malloc(sizeof(double)*ncols);
        cmask[i]          = (int   *)malloc(sizeof(int   )*ncols);
        for (j = 0; j < ncols; j++) {
            cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
            cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
        }
    }

    weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
    for (i = 0; i < ncols; i++) {
        cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
    }

    if (transpose) {
        dimx  = ncols;
        dimy  = nrows;
    }

    ccluster = (int *)malloc(sizeof(int)*dimx);

    Node *tree   = treecluster(nrows, ncols, cdata, cmask, cweights, transpose, dist, method, 0);
    VALUE result = Qnil, cluster;

    if (tree) {
        cuttree(dimx, tree, nsets, ccluster);

        result  = rb_hash_new();
        cluster = rb_ary_new();

        for (i = 0; i < dimx; i++)
            rb_ary_push(cluster, INT2NUM(ccluster[i]));

        rb_hash_aset(result, ID2SYM(rb_intern("cluster")),   cluster);
    }

    for (i = 0; i < nrows; i++) {
        free(cdata[i]);
        free(cmask[i]);
    }

    free(cdata);
    free(cmask);
    free(cweights);
    free(ccluster);

    if (tree)
        free(tree);
    else
        rb_raise(rb_eNoMemError, "treecluster ran out of memory");

    return result;
}
Ejemplo n.º 5
0
bool BagOfFeatures::cutHierarchicalTree(int numClusters)
{
    if(hClusterData == NULL || hTree == NULL)
        return false;

    if(dictionary != NULL)
        cvReleaseMat(&dictionary);
    int i, j, index;
    float *ptrCenter;

    int *clusterID = new int [numFeatures];
	int *indexCount = new int [numClusters];
	// initialize the count to zero
	for(i = 0; i < numClusters; i++)
		indexCount[i] = 0;

    dictionary = cvCreateMat(numClusters, descrSize, CV_32FC1);

    cvSetZero(dictionary);

	// Cluster the features based on the cluster_count
	cuttree(numFeatures, hTree, numClusters, clusterID);

    // Find the number of features in each cluster
    for(i = 0; i < numFeatures; i++)
    {
        index = clusterID[i];
        indexCount[index]++;
    }

	// Figure out how many clusters per index
	for(i = 0; i < numFeatures; i++)
	{
        index = clusterID[i];
		ptrCenter = (float *)(dictionary->data.ptr + index * dictionary->step);
		//cout << i << "\t";
		for(j = 0; j < descrSize; j++)
        {
            ptrCenter[j] += (float)hClusterData[i][j];
            cout << hClusterData[i][j] << " ";
        }
        cout << endl;
	}

	for(i = 0; i < numClusters; i++)
	{
        ptrCenter = (float *)(dictionary->data.ptr + i * dictionary->step);
        //cout << i << " \t\t\t" << indexCount[i] << endl << endl;
        float t = indexCount[i];
        for(j = 0; j < descrSize; j++)
        {
            ptrCenter[j] /= (float)indexCount[i];
        }
    }


/*
    int k;
    float *checkData = new float [descrSize];
    float minDist;
    float dist;
    int temp;
    int minIndex;

    for(i = 0; i < numFeatures; i++)
    {
        minDist = 999999.;
        for(j = 0; j < numClusters; j++)
        {
            ptrCenter = (float*)(dictionary->data.ptr + j*dictionary->step);
            for(k = 0; k < descrSize; k++)
            {
                checkData[k] = ptrCenter[k];
            }
            dist = 0;
            for(k = 0; k < descrSize; k++)
            {
                dist += (checkData[k] - hClusterData[i][k])*(checkData[k] - hClusterData[i][k]);
            }
            dist /= descrSize;//sqrt(dist);
            if(dist < minDist)
            {
                minDist = dist;
                minIndex = j;
            }
        }
        temp = clusterID[i];
        if(minIndex != clusterID[i])
            cout << "PROBLEM DURING CLUSTERING" << endl;
    }
    delete [] checkData;
*/

    delete [] clusterID;
	delete [] indexCount;
    return true;
}
Ejemplo n.º 6
0
void example_hierarchical( int nrows, int ncols, 
                           double** data, 
                           char* jobname, 
                           int k, double** distmatrix,
                           int *clusterid)
/* Perform hierarchical clustering ... , double** distmatrix */
{ int i, ii, nl, nc;
  const int nnodes = nrows-1;
  double* weight = malloc(ncols*sizeof(double));
  Node* tree;
  int** mask = NULL;
  char* filename;
  //char* filename2;


  mask = (int **)calloc(sizeof(int*), nrows);
  for (ii=0;ii<nrows;++ii) {
    mask[ii] = (int *)calloc(sizeof(int),ncols);
  }
  
  for (nl=0; nl<nrows; ++nl) {
    for (nc=0; nc<ncols; ++nc) {
      mask[nl][nc] = 1;
    }
  }


  for (i = 0; i < ncols; i++) weight[i] = 1.0;
  printf("\n");


  FILE *out1;

  int n = 1 + strlen(jobname) + strlen("_C") + strlen(".ext");

  if (k)
    { int dummy = k;
      do n++; while (dummy/=10);
    }
    

  filename = malloc(n*sizeof(char));
  
  sprintf (filename, "%s_C%d.hie", jobname, k);
  out1 = fopen( filename, "w" );

  /*FILE *out2;
  filename2 = malloc(n*sizeof(char));
  
  sprintf (filename2, "%s_C%d.hi1", jobname, k);
  out2 = fopen( filename2, "w" );*/

  //HERE SHOULD USE method instead of 'xxx' (s,m,a,c)


  printf("================ Pairwise single linkage clustering ============\n");
  /* Since we have the distance matrix here, we may as well use it. */
  tree = treecluster(nrows, ncols, 0, 0, 0, 0, 'e', 's', distmatrix);
  /* The distance matrix was modified by treecluster, so we cannot use it any
   * more. But we still need to deallocate it here.
   * The first row of distmatrix is a single null pointer; no need to free it.
   */
  for (i = 1; i < nrows; i++) free(distmatrix[i]);
  free(distmatrix);
  if (!tree)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(weight);
    return;
  }

  #if 0
   /* Andrej: This block looked like it was commented out
    I took out some of the * / because they 
    were generating warning and blocked out the 
    entire section with #if 0 . 
    The compiler will not compile this section */
    
  fprintf(out2,"Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    fprintf(out2,"%3d:%9d%9d      %g\n",
           -i-1, tree[i].left, tree[i].right, tree[i].distance);
	   printf("\n");
	   fclose(out2);
  //free(tree);

  
  printf("================ Pairwise maximum linkage clustering ============\n");
  tree = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'm', 0);
  /* Here, we let treecluster calculate the distance matrix for us. In that
   * case, the treecluster routine may fail due to insufficient memory to store
   * the distance matrix. For the small data sets in this example, that is
   * unlikely to occur though. Let's check for it anyway:
   */
  if (!tree)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n",
           -i-1, tree[i].left, tree[i].right, tree[i].distance);
  printf("\n");
  free(tree);



  printf("================ Pairwise average linkage clustering ============\n");
  tree = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'a', 0); 
  if (!tree)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n",
           -i-1, tree[i].left, tree[i].right, tree[i].distance);
  printf("\n");
  free(tree);



  printf("================ Pairwise centroid linkage clustering ===========\n");
  tree = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'c', 0); 
  if (!tree)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n",
           -i-1, tree[i].left, tree[i].right, tree[i].distance);
  printf("\n");

  #endif



  printf("=============== Cutting a hierarchical clustering tree ==========\n");
  clusterid = malloc(nrows*sizeof(int));
  printf(" number of clusters %d \n",k);
  cuttree (nrows, tree, k, clusterid);
  for(i=0; i<nrows; i++)
  fprintf(out1, "%09d\t%2d\n", i, clusterid[i]);
  fprintf(out1, "\n");
  fclose(out1);


  for (ii=0;ii<nrows;++ii) {
    if (mask[ii]) free(mask[ii]);
  }
  free(mask);
  free(tree); 
  free(weight);
  return;
}
void AgglomerativeClustering::startClustering(const ClusterMethodParameters* pParameters, Document *pDocument, ClusteringResult *pClusteringResult)
{
	mpDocument = pDocument;
	mpClusteringResult = pClusteringResult;
	// preprocess glyphs or compute features, depending on distance type:
	if (pParameters->dataType == FEATURES_BASED) {
		pDocument->computeFeatures();
	}
	else {
		pDocument->preprocessAllGlyphs();
	}
//	this->mpDocument->setDistanceType(pParameters->dataType);

	// retrieve parameters:
	const AgglomerativeParameters *pParams = (const AgglomerativeParameters*)(pParameters);

	// print some debug info:
	std::cout << "Starting agglomerative clustering algorithm..." << std::endl;
	std::cout << "nr of clusters: " << pParams->nClusters << std::endl;
	std::cout << "feature dist type: " << pParams->featureDistType << std::endl;
	std::cout << "cluster dist type: " << pParams->clusterDistType << std::endl;

	mpClusteringResult->deleteClustering(); // delete probably old clustering result
//	setClusterDistanceTypeFunctionPointer(pParams->clusterDistType);
	const int nSamples = this->mpDocument->nParsedImages();
	if (nSamples < 2) {
		throw NoDataException("No data found for clustering!");
	}
	if (pParameters->dataType == FEATURES_BASED && this->mpDocument->nFeatures() < 2) {
		throw NoDataException("No features found for clustering!");
	}

	StopWatch watch;
	// initialize distance matrix:
	std::cout << "computing distance matrix..." << std::endl;
	watch.start();
	initDistanceMatrix();
	watch.stop();
	std::cout << "successfully computed distance matrix" << std::endl;

	// TEST: USING CLUSTER LIBRARY:
	double **tmpdistmatrix;

	// Allocate memory for distance matrix
	tmpdistmatrix = new double*[nSamples];
	for (int i = 0; i < nSamples; ++i)
		tmpdistmatrix[i] = new double[nSamples];
	// copy distance matrix
	for (int i=0;i<nSamples;++i){
		for (int j=0; j<=i; ++j) {
			tmpdistmatrix[i][j] = mDistMat(i,j);
			tmpdistmatrix[j][i] = mDistMat(i,j);
		}
	}
	std::vector<int> labels(nSamples);
#if 0
	int npass = 100;
	double error;
	int ifound;
	watch.start();
	kmedoids (pParams->nClusters, nSamples, tmpdistmatrix,
			npass, &labels[0], &error, &ifound);
	watch.stop();
	std::cout << "finished kmedioids, error = " << error << ", ifound = " << ifound << std::endl;

#else
	// method = 's' (single-linkage), 'm' (complete-linkage), 'a' (average-linkage) or 'c' (centroid-linkage):
	char methodChar='a';
	switch (pParams->clusterDistType) {
		case AVG_DIST:
			methodChar='a';
			break;
		case MIN_DIST:
			methodChar='s';
			break;
		case MAX_DIST:
			methodChar='m';
			break;
		default:
			throw Exception("Unknown distance type in AgglomerativeClustering::startClustering()");
			break;
	} // end switch

	Node* tree = treecluster(nSamples, 1, NULL, NULL, NULL, 0, '_', methodChar, tmpdistmatrix);
	if (tree==NULL)
		std::cerr << "FATAL ERROR - NULL POINTER IN CLUSTER RESULT!" << std::endl;

	// cut hierarchical cluster tree at specified nr of clusters:
	cuttree(nSamples, tree, pParams->nClusters, &labels[0]);
	delete [] tree;
#endif

	// De-Allocate memory for temporary distance matrix:
	for (int i = 0; i < nSamples; ++i)
		delete [] tmpdistmatrix[i];
	delete [] tmpdistmatrix;
	std::cout << "finished agglo clustering with cluster libarary!!" << std::endl;
	pClusteringResult->createClusteringResultFromLabelVector(labels, pDocument);
	/////////////////// END TEST

#if 0
	// get pointer to image chars:
	std::vector<ImageChar*> *imageCharVecPointer = this->mpDocument->getImageCharsVecPointer();

	// create cluster for each instance:
	CharCluster *pCluster=NULL;
	for (int i=0; i<nSamples; ++i) {
		pCluster = mpClusteringResult->addEmptyCluster(this->mpDocument);
		pCluster->addChar( (*imageCharVecPointer)[i] );
	}


	std::cout << "Starting merging process..." << std::endl;
//	watch.start();
	std::vector<float> minValVec;
	std::vector<int> nClustsVec;
	// while nr of clusters not reached -> merge two nearest clusters
//	while (mClusterVec.size() > pParams->nClusters) {
	while (mpClusteringResult->nClusters() > pParams->nClusters) {
		// find min element of distance matrix:
		minValVec.push_back(mMinDist);
//		nClustsVec.push_back(mClusterVec.size());
		nClustsVec.push_back(mpClusteringResult->nClusters());
//		std::cout << "nr of clusters is " << mClusterVec.size() << std::endl;
//		std::cout << "min dist is " << mMinDist << " on index " << mMinIndex << std::endl;
		watch.start();
		updateClusterLabels();
		std::cout << "updated cluster labels, time = " << watch.stop(false) << std::endl;

		watch.start();
		findMinDist();
		std::cout << "found min dist, time = " << watch.stop(false) << std::endl;
		std::cout << "new nr of clusters is " << mpClusteringResult->nClusters() << std::endl;
	} // end while
//	writeTxtFile(nClustsVec, "C:/projekte/impact/matlab_sebastian/c_prog_out/n_clusts_vec.txt");
//	writeTxtFile(minValVec, "C:/projekte/impact/matlab_sebastian/c_prog_out/min_val_vec.txt");
//	watch.stop();
#endif

	pDocument->clearAllPreprocessing();
	pDocument->clearFeatures();
	pClusteringResult->computePrototypeFeatures();
	pClusteringResult->updatePrototypes(true);

	return;
} // end startClustering()
Ejemplo n.º 8
0
void example_hierarchical(int nrows, int ncols, double** data, int** mask,
			  double** distmatrix)
/* Perform hierarchical clustering on genes */
{ int i;
  int result;
  const int nnodes = nrows - 1;
  int (*node)[2] = malloc(nnodes*sizeof(int[2])); 
  int* clusterid = malloc(nrows*sizeof(int));
  double* distances = malloc(nnodes*sizeof(double));
  double* weight = malloc(ncols*sizeof(double));
  for (i = 0; i < ncols; i++) weight[i] = 1.0;
  printf("\n");
  printf("================ Pairwise single linkage clustering ============\n");
  /* Since we have the distance matrix here, we may as well use it. */
  treecluster(nrows, ncols, 0, 0, 0, 0, 'e', 's', node, distances, distmatrix);
  /* The distance matrix was modified by treecluster, so we cannot use it any
   * more. But we still need to deallocate it here.
   * The first row of distmatrix is a single null pointer; no need to free it.
   */
  for (i = 1; i < nrows; i++) free(distmatrix[i]);
  free(distmatrix);
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n", -i-1, node[i][0], node[i][1], distances[i]);
  printf("\n");
  printf("================ Pairwise maximum linkage clustering ============\n");
  result = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'm', node,
                       distances, 0);
  /* Here, we let treecluster calculate the distance matrix for us. In that
   * case, the treecluster routine may fail due to insufficient memory to store
   * the distance matrix. For the small data sets in this example, that is
   * unlikely to occur though. Let's check for it anyway:
   */
  if (!result)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(clusterid);
    free(node);
    free(distances);
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n", -i-1, node[i][0], node[i][1], distances[i]);
  printf("\n");
  printf("================ Pairwise average linkage clustering ============\n");
  result = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'a', node,
                       distances, 0); 
  if (!result)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(clusterid);
    free(node);
    free(distances);
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n", -i-1, node[i][0], node[i][1], distances[i]);
  printf("\n");
  printf("================ Pairwise centroid linkage clustering ===========\n");
  result = treecluster(nrows, ncols, data, mask, weight, 0, 'e', 'c', node,
                       distances, 0); 
  if (!result)
  { /* Indication that the treecluster routine failed */
    printf ("treecluster routine failed due to insufficient memory\n");
    free(clusterid);
    free(node);
    free(distances);
    free(weight);
    return;
  }
  printf("Node     Item 1   Item 2    Distance\n");
  for(i=0; i<nnodes; i++)
    printf("%3d:%9d%9d      %g\n", -i-1, node[i][0], node[i][1], distances[i]);
  printf("\n");
  printf("=============== Cutting a hierarchical clustering tree ==========\n");
  cuttree (nrows, node, 3, clusterid);
  for(i=0; i<nrows; i++)
    printf("Gene %2d: cluster %2d\n", i, clusterid[i]);
  printf("\n");
  free(clusterid);
  free(node);
  free(distances);
  free(weight);
  return;
}
Ejemplo n.º 9
0
bool HClusterDlg::Run(vector<wxInt64>& clusters)
{
    // NOTE input_data should be retrieved first!!
    // get input: weights (auto)
    weight = GetWeights(columns);
    
    double* pwdist = NULL;
    if (dist == 'e') {
        pwdist = DataUtils::getPairWiseDistance(input_data, weight, rows,
                                                columns,
                                                DataUtils::EuclideanDistance);
    } else {
        pwdist = DataUtils::getPairWiseDistance(input_data, weight, rows,
                                                columns,
                                                DataUtils::ManhattanDistance);
    }

    fastcluster::auto_array_ptr<t_index> members;
    if (htree != NULL) {
        delete[] htree;
        htree = NULL;
    }
    htree = new GdaNode[rows-1];
    fastcluster::cluster_result Z2(rows-1);

    if (method == 's') {
        fastcluster::MST_linkage_core(rows, pwdist, Z2);
    } else if (method == 'w') {
        members.init(rows, 1);
        fastcluster::NN_chain_core<fastcluster::METHOD_METR_WARD, t_index>(rows, pwdist, members, Z2);
    } else if (method == 'm') {
        fastcluster::NN_chain_core<fastcluster::METHOD_METR_COMPLETE, t_index>(rows, pwdist, NULL, Z2);
    } else if (method == 'a') {
        members.init(rows, 1);
        fastcluster::NN_chain_core<fastcluster::METHOD_METR_AVERAGE, t_index>(rows, pwdist, members, Z2);
    }

    delete[] pwdist;

    std::stable_sort(Z2[0], Z2[rows-1]);
    t_index node1, node2;
    int i=0;
    fastcluster::union_find nodes(rows);
    for (fastcluster::node const * NN=Z2[0]; NN!=Z2[rows-1]; ++NN, ++i) {
        // Find the cluster identifiers for these points.
        node1 = nodes.Find(NN->node1);
        node2 = nodes.Find(NN->node2);
        // Merge the nodes in the union-find data structure by making them
        // children of a new node.
        nodes.Union(node1, node2);

        node2 = node2 < rows ? node2 : rows-node2-1;
        node1 = node1 < rows ? node1 : rows-node1-1;

        //cout << i<< ":" << node2 <<", " <<  node1 << ", " << Z2[i]->dist <<endl;
        //cout << i<< ":" << htree[i].left << ", " << htree[i].right << ", " << htree[i].distance <<endl;
        htree[i].left = node1;
        htree[i].right = node2;
        htree[i].distance = Z2[i]->dist;
    }
    clusters.clear();
    int* clusterid = new int[rows];
    cutoffDistance = cuttree (rows, htree, n_cluster, clusterid);
    for (int i=0; i<rows; i++) {
        clusters.push_back(clusterid[i]+1);
    }
    delete[] clusterid;
    clusterid = NULL;

    return true;
}