//------------------------------------------------------------------------ // Print summary of execution //------------------------------------------------------------------------ void printSummary( const KMlocal& theAlg, // the algorithm const KMdata& dataPts, // the points KMfilterCenters& ctrs) // the centers { cout << "Number of stages: " << theAlg.getTotalStages() << "\n"; cout << "Average distortion: " << ctrs.getDist(false)/double(ctrs.getNPts()) << "\n"; // print final center points cout << "(Final Center Points:\n"; ctrs.print(); cout << ")\n"; // get/print final cluster assignments KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()]; double* sqDist = new double[dataPts.getNPts()]; ctrs.getAssignments(closeCtr, sqDist); *kmOut << "(Cluster assignments:\n" << " Point Center Squared Dist\n" << " ----- ------ ------------\n"; for (int i = 0; i < dataPts.getNPts(); i++) { *kmOut << " " << setw(5) << i << " " << setw(5) << closeCtr[i] << " " << setw(10) << sqDist[i] << "\n"; } *kmOut << ")\n"; delete [] closeCtr; delete [] sqDist; }
// standard constructor KMfilterCenters::KMfilterCenters(int k, KMdata& p, double df) : KMcenters(k, p) { if (p.getKcTree() == NULL) { // kc-tree not yet built? kmError("Building kc-tree", KMwarn); p.buildKcTree(); // build it now } sums = kmAllocPts(kCtrs, getDim()); sumSqs = new double[kCtrs]; weights = new int[kCtrs]; dists = new double[kCtrs]; currDist = KM_HUGE; dampFactor = df; currDBIndex = KM_HUGE; currXBIndex = KM_HUGE; invalidate(); // distortions are initially invalid }
static std::vector<double> computeKMeans( KMdata & datapoints, uint64_t const k, uint64_t const runs, bool const debug ) { KMterm const term(runs, 0, 0, 0, 0.10, 0.10, 3, 0.50, 10, 0.95); datapoints.buildKcTree(); KMfilterCenters ctrs(k, datapoints); KMlocalHybrid kmAlg(ctrs, term); ctrs = kmAlg.execute(); std::vector<double> centrevector; for ( uint64_t i = 0; i < k; ++i ) { centrevector.push_back(ctrs[i][0]); if ( debug ) std::cerr << "centre[" << i << "]=" << ctrs[i][0] << std::endl; } std::sort(centrevector.begin(),centrevector.end()); return centrevector; }
//------------------------------------------------------------------------ // Print summary of execution //------------------------------------------------------------------------ void printSummary( const KMlocal& theAlg, // the algorithm const KMdata& dataPts, // the points KMfilterCenters& ctrs) // the centers { ofstream out; // output data file stream out.open("output3.txt",ios::out); if(!out){ // si l'ouverture a échoué cerr << "Impossible d'ouvrir le fichier erreur2" << endl; exit(0); } out << "Number of stages: " << theAlg.getTotalStages() << "\n"; out << "Average distortion: " << ctrs.getDist(false)/double(ctrs.getNPts()) << "\n"; // print final center points out << "(Final Center Points:\n"; ctrs.print(); out << ")\n"; // get/print final cluster assignments KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()]; double* sqDist = new double[dataPts.getNPts()]; ctrs.getAssignments(closeCtr, sqDist); // obligé d'avoir la distance ? out << "(Cluster assignments:\n" << "Point Center :\n" << "----- ------ \n"; for (int i = 0; i < dataPts.getNPts(); i++) { out << setw(5) << i << setw(5) << closeCtr[i] << "\n"; } out << ")\n"; delete [] closeCtr; delete [] sqDist; out.close(); // on ferme le fichier }
/** * \fn void exportSTIPs(std::string stip, int dim, const KMdata& dataPts) * \brief STIPs exportation function in the format 1 point = 1 line. * Each dimension are separated from one space (" "). * * \param[in] stip Name of the file containing the STIPs. * \param[in] dim The STIPs dimension. * \param[in] dataPts The KMlocal object which will be containing STIPs. */ void exportSTIPs(std::string stip, int dim, const KMdata& dataPts){ int nPts = dataPts.getNPts(); // actual number of points // open fiouverture en écriture avec effacement du fichier ouvert ofstream sSTIPs(stip.c_str(), ios::out | ios::trunc); if(!sSTIPs){ std::cerr << "Impossible d'ouvrir le fichier !" << std::endl; exit(EXIT_FAILURE); } for(int i=0; i<nPts ;i++){ for(int d = 0; d<dim ; d++){ sSTIPs << dataPts[i][d] << " "; } sSTIPs << std::endl; } sSTIPs.close(); }
// standard constructor KMcenters::KMcenters(int k, KMdata& p) : kCtrs(k), pts(&p) { ctrs = kmAllocPts(kCtrs, p.getDim()); }
//------------------------------------------------------------------------ // Print summary of execution //------------------------------------------------------------------------ void printSummary(const KMlocal& theAlg, // the algorithm const KMdata& dataPts, // the points KMfilterCenters& ctrs) // the centers { double dbval, xbval; dbval = ctrs.getDBIndex(); xbval = ctrs.getXBIndex(); FILE* logfp = fopen("kmean.log", "wt"); fprintf(logfp, "Number of stages: %d\n", theAlg.getTotalStages()); fprintf(logfp, "Average distortion: %g\n", ctrs.getDist(false)/double(ctrs.getNPts())); fprintf(logfp, "DB-index: %g\n", dbval); fprintf(logfp, "XB-index: %g\n", xbval); cout << "Number of stages: " << theAlg.getTotalStages() << "\n"; cout << "Average distortion: " << ctrs.getDist(false)/double(ctrs.getNPts()) << "\n"; cout << "DB-index = " << dbval << "\n"; cout << "XB-index = " << xbval << "\n"; KMctrIdxArray closeCtr = new KMctrIdx[dataPts.getNPts()]; double* sqDist = new double[dataPts.getNPts()]; ctrs.getAssignments(closeCtr, sqDist); int* hist = new int[ctrs.getK()]; memset(hist, 0, sizeof(int) * ctrs.getK() ); for(int i=0; i < dataPts.getNPts(); i++ ) { int k = closeCtr[i]; hist[k] ++; } KMpoint kpt; for(int i=0; i< ctrs.getK(); i++ ) { fprintf(logfp, "%3d-th, #%5d ", i, hist[i]); // print final center points kpt = ctrs[i]; fprintf(logfp, " ["); for (int j = 0; j < ctrs.getDim(); j++) { fprintf(logfp, "%7g ", kpt[j]); } fprintf(logfp, " ]\n"); } fclose(logfp); // write to fea-file CFeaFileWriter theFeaWriter; char saveName[256]; sprintf(saveName, "kmain_%s", infname.c_str() ); theFeaWriter.openFile(saveName); FEA_FILE_HEADER feaHeader; feaHeader.nVersion = FEA_FILE_VERSION; feaHeader.nRecords = ctrs.getK(); feaHeader.nFeaDim = ctrs.getDim(); feaHeader.nElemType = ELEM_TYPE_FLOAT; feaHeader.nElemSize = sizeof(float); feaHeader.bIndexTab = 0; feaHeader.bVarLen = 0; sprintf(feaHeader.szFeaName, "kmean codebook"); theFeaWriter.setFileHeader(feaHeader); float* pFea = new float[ctrs.getDim()]; for(int i=0; i< ctrs.getK(); i++ ) { // save final center points kpt = ctrs[i]; for (int j = 0; j < ctrs.getDim(); j++) pFea[j] = kpt[j]; theFeaWriter.writeRecordAt(pFea, i); } theFeaWriter.flush2Disk(); theFeaWriter.closeFile(); theFeaWriter.releaseMemory(); delete [] pFea; delete [] closeCtr; delete [] sqDist; delete [] hist; }
/** * \fn void kmIvanAlgorithm(int ic, int dim, const KMdata& dataPts, int k, KMfilterCenters& ctrs) * \brief This is an optimized KMeans algorithm. Ivan's algorithm uses * basic KMeans algorithm (here the Lloyd's one) and the idea was to * initialize centers intelligently. * * \param[in] ic The iteration coefficient will determine the number of iterations in each phases. * \param[in] dim Points and centers's dimension. * \param[in] dataPts The data we want to compute the centers. * \param[in] k The number of centers. * \param[out] ctrs The centers. * * The Ivan's algorithm is divided into 3 phases. The first phase is executed on * 25 per cent of the data (randomly sampled). To begin, the centers are randomly generated. * Then ic * 4 iterations of a KMeans algorithm are executed. * During the second part we cluster 50 per cent of the data using the older centroids. * This step is computed ic * 2 times. * Finally, we make ic * 1 iteration on all the data. * */ void kmIvanAlgorithm(int ic, int dim, const KMdata& dataPts, int k, KMfilterCenters& ctrs){ int nPts = dataPts.getNPts(); KMdata subDataPts(dim,nPts); // maxPts = nPts since subDataPts is a sample of dataPts int* randomVector = NULL; double** centersBuffer = NULL; centersBuffer = (double **) malloc(k*sizeof(double*)); for(int c=0 ; c<k ; c++){ centersBuffer[c] = (double*) malloc(dim*sizeof(double)); } // ic : iteration coefficient int nrPhases = 3; for(int i=0 ; i<nrPhases ; i++){ std::cerr<<"phase:"<<i<<std::endl; int maxIter = (int) pow(2,(nrPhases-1-i)); int sampleSize = floor(nPts/maxIter); std::cout << "Applying k-means: " << endl; std::cout << "Clustering " << sampleSize << " vectors (ie. " << 100/maxIter << " percent of the data)"; std::cout << " in " << k << " clusters"; if (i>0){ std::cout << " using older centroids..." << std::endl; } else{ std::cout << "..." << endl; std::cout << "Initializing centroids by sampling..." << std::endl; } if (i == nrPhases-1){ // Filling subDataPts for(int s=0; s<nPts ; s++){ for(int d=0 ; d<dim ; d++){ subDataPts[s][d] = dataPts[s][d]; } } subDataPts.setNPts(nPts); } else{ // Filling the random vector permiting to sampling "uniformly" (as more as we can) the data std::cerr<<"ok++"<<std::endl; randomVector = (int*) malloc(sampleSize * sizeof(int)); srand(time(NULL)); // initialisation of rand for(int s=0; s<sampleSize ;s++){ int r = (int) rand()%(nPts); int index = 0; while(index<s && randomVector[index] != r){ index++; } if(s==0 || randomVector[index] != r) randomVector[s] = r; else{ s--; } } std::cerr<<"ok--"<<std::endl; // Filling subDataPts for(int s=0; s<sampleSize ; s++){ for(int d=0 ; d<dim ; d++){ subDataPts[s][d] = dataPts[randomVector[s]][d]; } } subDataPts.setNPts(sampleSize); } subDataPts.buildKcTree(); // Allocate centers with subData KMfilterCenters newCtrs(k, subDataPts); // Initializing the centers (randomly for the first iteration) if(i==0){ (newCtrs).genRandom(); } else{ for(int c = 0; c < k ; c++){ for(int d=0 ; d<dim ; d++){ (newCtrs)[c][d] = centersBuffer[c][d]; } } } for(int iteration = 0 ; iteration < ic*maxIter ; iteration++){ // ic : iteration coefficient (newCtrs).lloyd1Stage(); } // Saving the old centers in centersBuffer for(int c = 0; c < k ; c++){ for(int d=0 ; d<dim ; d++){ centersBuffer[c][d] = (newCtrs)[c][d]; } } if(i==nrPhases-1){ for(int c = 0; c < k ; c++){ for(int d=0 ; d<dim ; d++){ ctrs[c][d] = centersBuffer[c][d]; } } } free(randomVector); randomVector = NULL; } for(int c=0 ; c<k ; c++){ free(centersBuffer[c]); } free(centersBuffer); }