Exemple #1
0
//------------------------------------------------------------------------
//  Print summary of execution
//------------------------------------------------------------------------
void printSummary(
    const KMlocal&		theAlg,		// the algorithm
    const KMdata&		dataPts,	// the points
    KMfilterCenters&		ctrs)		// the centers
{
    cout << "Number of stages: " << theAlg.getTotalStages() << "\n";
    cout << "Average distortion: " <<
	         ctrs.getDist(false)/double(ctrs.getNPts()) << "\n";
    					// print final center points
    cout << "(Final Center Points:\n";
    ctrs.print();
    cout << ")\n";
    					// get/print final cluster assignments
    KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()];
    double* sqDist = new double[dataPts.getNPts()];
    ctrs.getAssignments(closeCtr, sqDist);

    *kmOut	<< "(Cluster assignments:\n"
		<< "    Point  Center  Squared Dist\n"
		<< "    -----  ------  ------------\n";
    for (int i = 0; i < dataPts.getNPts(); i++) {
	*kmOut	<< "   " << setw(5) << i
		<< "   " << setw(5) << closeCtr[i]
		<< "   " << setw(10) << sqDist[i]
		<< "\n";
    }
    *kmOut << ")\n";
    delete [] closeCtr;
    delete [] sqDist;
}
Exemple #2
0
					// standard constructor
KMfilterCenters::KMfilterCenters(int k, KMdata& p, double df)
    : KMcenters(k, p) {
    if (p.getKcTree() == NULL) {	// kc-tree not yet built?
      kmError("Building kc-tree", KMwarn);
      p.buildKcTree();			// build it now
    }
    sums	= kmAllocPts(kCtrs, getDim());
    sumSqs	= new double[kCtrs];
    weights	= new int[kCtrs];
    dists	= new double[kCtrs];
    currDist	= KM_HUGE;
    dampFactor	= df;
	currDBIndex = KM_HUGE;
	currXBIndex = KM_HUGE;
    invalidate();			// distortions are initially invalid
}
static std::vector<double> computeKMeans(
	KMdata & datapoints,
	uint64_t const k,
	uint64_t const runs,
	bool const debug
	)
{
	KMterm const term(runs, 0, 0, 0, 0.10, 0.10, 3, 0.50, 10, 0.95);
	datapoints.buildKcTree();
	KMfilterCenters ctrs(k, datapoints);
	KMlocalHybrid kmAlg(ctrs, term);
	ctrs = kmAlg.execute();

	std::vector<double> centrevector;
	for ( uint64_t i = 0; i < k; ++i )
	{
		centrevector.push_back(ctrs[i][0]);
		if ( debug )
			std::cerr << "centre[" << i << "]=" << ctrs[i][0] << std::endl;
	}

	std::sort(centrevector.begin(),centrevector.end());
	
	return centrevector;
}
Exemple #4
0
//------------------------------------------------------------------------
//  Print summary of execution
//------------------------------------------------------------------------
void printSummary(
		  const KMlocal&		theAlg,		// the algorithm
		  const KMdata&		dataPts,	// the points
		  KMfilterCenters&		ctrs)		// the centers
{
  ofstream out;			// output data file stream
  out.open("output3.txt",ios::out);
  if(!out){  // si l'ouverture a échoué
    cerr << "Impossible d'ouvrir le fichier erreur2" << endl;
    exit(0);
  }
  
  out << "Number of stages: " << theAlg.getTotalStages() << "\n";
  out << "Average distortion: " <<
    ctrs.getDist(false)/double(ctrs.getNPts()) << "\n";
  // print final center points
  out << "(Final Center Points:\n";
  ctrs.print();
  out << ")\n";
  // get/print final cluster assignments
  KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()];
  double* sqDist = new double[dataPts.getNPts()];
  ctrs.getAssignments(closeCtr, sqDist); // obligé d'avoir la distance ?
  
  out	<< "(Cluster assignments:\n"
	<< "Point  Center :\n"
	<< "-----  ------  \n";
  for (int i = 0; i < dataPts.getNPts(); i++) {
    out	<< setw(5) << i
	<< setw(5) << closeCtr[i]
      
	<< "\n";
  }
  out << ")\n";
  delete [] closeCtr;
  delete [] sqDist;
  out.close();  // on ferme le fichier
}
Exemple #5
0
/**
 * \fn void exportSTIPs(std::string stip, int dim, const KMdata& dataPts)
 * \brief STIPs exportation function in the format 1 point = 1 line.
 * Each dimension are separated from one space (" ").
 *
 * \param[in] stip Name of the file containing the STIPs.
 * \param[in] dim The STIPs dimension.
 * \param[in] dataPts The KMlocal object which will be containing STIPs.
 */
void exportSTIPs(std::string stip, int dim, const KMdata& dataPts){
  int nPts = dataPts.getNPts(); // actual number of points
  
  // open fiouverture en écriture avec effacement du fichier ouvert
  ofstream sSTIPs(stip.c_str(), ios::out | ios::trunc);
  if(!sSTIPs){
    std::cerr << "Impossible d'ouvrir le fichier !" << std::endl;
    exit(EXIT_FAILURE);
  }
  for(int i=0; i<nPts ;i++){
    for(int d = 0; d<dim ; d++){
      sSTIPs << dataPts[i][d] << " ";
    }
    sSTIPs << std::endl;
  }
  sSTIPs.close();
}
Exemple #6
0
    					// standard constructor
KMcenters::KMcenters(int k, KMdata& p)
    : kCtrs(k), pts(&p) {
    ctrs = kmAllocPts(kCtrs, p.getDim());
}
Exemple #7
0
//------------------------------------------------------------------------
//  Print summary of execution
//------------------------------------------------------------------------
void printSummary(const KMlocal&		theAlg,		// the algorithm
				  const KMdata&		dataPts,	// the points
				  KMfilterCenters&		ctrs)		// the centers
{
	double dbval, xbval;
	dbval = ctrs.getDBIndex();
	xbval = ctrs.getXBIndex();

	FILE* logfp = fopen("kmean.log", "wt");
	fprintf(logfp, "Number of stages: %d\n", theAlg.getTotalStages());
	fprintf(logfp, "Average distortion: %g\n", ctrs.getDist(false)/double(ctrs.getNPts()));
	fprintf(logfp, "DB-index: %g\n", dbval);
	fprintf(logfp, "XB-index: %g\n", xbval);	

	cout << "Number of stages: " << theAlg.getTotalStages() << "\n";
	cout << "Average distortion: " <<
		ctrs.getDist(false)/double(ctrs.getNPts()) << "\n";
	cout << "DB-index = " << dbval << "\n";
	cout << "XB-index = " << xbval << "\n";

	KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()];
	double* sqDist = new double[dataPts.getNPts()];
	ctrs.getAssignments(closeCtr, sqDist);

	int*  hist = new int[ctrs.getK()];
	memset(hist, 0, sizeof(int) * ctrs.getK() );
	for(int i=0; i < dataPts.getNPts(); i++ )
	{
		int k = closeCtr[i];
		hist[k] ++;
	}

	KMpoint kpt;
	for(int i=0; i< ctrs.getK(); i++ )
	{
		fprintf(logfp, "%3d-th, #%5d ", i, hist[i]);

		// print final center points
		kpt = ctrs[i];
		fprintf(logfp, " [");
		for (int j = 0; j < ctrs.getDim(); j++) 
		{
			fprintf(logfp, "%7g ", kpt[j]);
		}
		fprintf(logfp, " ]\n");
	}
	fclose(logfp);

	// write to fea-file
	CFeaFileWriter theFeaWriter;
	char saveName[256];
	sprintf(saveName, "kmain_%s", infname.c_str() );
	theFeaWriter.openFile(saveName);

	FEA_FILE_HEADER feaHeader;
	feaHeader.nVersion = FEA_FILE_VERSION;
	feaHeader.nRecords = ctrs.getK();
	feaHeader.nFeaDim = ctrs.getDim();
	feaHeader.nElemType = ELEM_TYPE_FLOAT;
	feaHeader.nElemSize = sizeof(float);
	feaHeader.bIndexTab = 0;
	feaHeader.bVarLen = 0;
	sprintf(feaHeader.szFeaName, "kmean codebook");
	theFeaWriter.setFileHeader(feaHeader);

	float* pFea = new float[ctrs.getDim()];
	for(int i=0; i< ctrs.getK(); i++ )
	{
		// save final center points
		kpt = ctrs[i];
		for (int j = 0; j < ctrs.getDim(); j++) 
			pFea[j] = kpt[j];

		theFeaWriter.writeRecordAt(pFea, i);
	}
	theFeaWriter.flush2Disk();
	theFeaWriter.closeFile();
	theFeaWriter.releaseMemory();
	delete [] pFea;

	delete [] closeCtr;
	delete [] sqDist;
	delete [] hist;
}
Exemple #8
0
/**
 * \fn void kmIvanAlgorithm(int ic, int dim,  const KMdata& dataPts, int k, KMfilterCenters& ctrs)
 * \brief This is an optimized KMeans algorithm. Ivan's algorithm uses
 * basic KMeans algorithm (here the Lloyd's one) and the idea was to 
 * initialize centers intelligently.
 *
 * \param[in] ic The iteration coefficient will determine the number of iterations in each phases.
 * \param[in] dim Points and centers's dimension.
 * \param[in] dataPts The data we want to compute the centers.
 * \param[in] k The number of centers.
 * \param[out] ctrs The centers.
 *
 * The Ivan's algorithm is divided into 3 phases. The first phase is executed on
 * 25 per cent of the data (randomly sampled). To begin, the centers are randomly generated.
 * Then ic * 4 iterations of a KMeans algorithm are executed.
 * During the second part we cluster 50 per cent of the data using the older centroids. 
 * This step is computed ic * 2 times.
 * Finally, we make ic * 1 iteration on all the data.
 *
 */
void kmIvanAlgorithm(int ic, int dim,  const KMdata& dataPts, int k, KMfilterCenters& ctrs){
  int nPts = dataPts.getNPts();
  KMdata subDataPts(dim,nPts); // maxPts = nPts since subDataPts is a sample of dataPts

  int* randomVector = NULL;
  
  double** centersBuffer = NULL;
  centersBuffer = (double **) malloc(k*sizeof(double*));
  for(int c=0 ; c<k ; c++){
    centersBuffer[c] = (double*) malloc(dim*sizeof(double));
  }
  
  // ic : iteration coefficient
  int nrPhases = 3;
  for(int i=0 ; i<nrPhases ; i++){
    std::cerr<<"phase:"<<i<<std::endl;
    int maxIter = (int) pow(2,(nrPhases-1-i));
    int sampleSize = floor(nPts/maxIter);
    
    std::cout << "Applying k-means: " << endl;
    std::cout << "Clustering " << sampleSize << " vectors (ie. " << 100/maxIter << " percent of the data)";
    std::cout << " in " << k << " clusters";
    if (i>0){
      std::cout << " using older centroids..." << std::endl;
    }
    else{
      std::cout << "..." << endl;
      std::cout << "Initializing centroids by sampling..." << std::endl;
    }

    if (i == nrPhases-1){
      // Filling subDataPts
      for(int s=0; s<nPts ; s++){
	for(int d=0 ; d<dim ; d++){
	  subDataPts[s][d] = dataPts[s][d];
	}
      }
      subDataPts.setNPts(nPts);
    }
    else{
      // Filling the random vector permiting to sampling "uniformly" (as more as we can) the data  
      std::cerr<<"ok++"<<std::endl; 
      randomVector = (int*) malloc(sampleSize * sizeof(int));
      srand(time(NULL)); // initialisation of rand
      for(int s=0; s<sampleSize ;s++){
	int r = (int) rand()%(nPts);
	int index = 0;
	while(index<s && randomVector[index] != r){
	  index++;
	}
	if(s==0 || randomVector[index] != r)
	  randomVector[s] = r;
	else{
	  s--;
	}
      }
      std::cerr<<"ok--"<<std::endl; 
      // Filling subDataPts
      for(int s=0; s<sampleSize ; s++){
	for(int d=0 ; d<dim ; d++){
	  subDataPts[s][d] = dataPts[randomVector[s]][d];
	}
      }
      subDataPts.setNPts(sampleSize);
    }
    subDataPts.buildKcTree();
    
    // Allocate centers with subData
    KMfilterCenters newCtrs(k, subDataPts);
    
    // Initializing the centers (randomly for the first iteration)
    if(i==0){
      (newCtrs).genRandom(); 
    }
    else{
      for(int c = 0; c < k ; c++){
	for(int d=0 ; d<dim ; d++){
	  (newCtrs)[c][d] = centersBuffer[c][d];
	}
      } 
    }
    for(int iteration = 0  ; iteration < ic*maxIter ; iteration++){ // ic : iteration coefficient
      (newCtrs).lloyd1Stage();
    }
    
    // Saving the old centers in centersBuffer
    for(int c = 0; c < k ; c++){
      for(int d=0 ; d<dim ; d++){
	centersBuffer[c][d] = (newCtrs)[c][d];
      }
    }
    
    if(i==nrPhases-1){
      for(int c = 0; c < k ; c++){
	for(int d=0 ; d<dim ; d++){
	  ctrs[c][d] = centersBuffer[c][d];
	}
      }
    }
    free(randomVector);
    randomVector = NULL;
  }

  for(int c=0 ; c<k ; c++){
    free(centersBuffer[c]);
  }
  free(centersBuffer);
}