int PerformKMeans(TRAININGSET *pTS, CODEBOOK *pCB, PARTITIONING *pP, int clus, int repeats, int InitMethod, int quietLevel, int useInitial) { PARTITIONING Pnew, Pinit; CODEBOOK CBnew, CBinit; llong distance[BookSize(pTS)]; llong distanceInit[BookSize(pTS)]; double totalTime, error, currError; int i, better, iter, totalIter; SetClock(&totalTime); totalIter = 0; currError = error = 0; if ((clus < 1) || (BookSize(pTS) < clus) || (repeats < 1)) { return 1; /* clustering failed */ } InitializeSolutions(pTS, pCB, pP, &CBnew, &Pnew, &CBinit, &Pinit, distanceInit, clus, useInitial); PrintHeader(quietLevel); /* perform repeats time full K-means */ for (i = 0; i < repeats; i++) { better = iter = 0; GenerateSolution(pTS, &CBnew, &Pnew, &CBinit, &Pinit, distance, distanceInit, InitMethod, useInitial); KMeansIterate(pTS, &CBnew, &Pnew, distance, quietLevel, i, &iter, &totalTime, &error, useInitial); totalIter += iter; /* got better result */ if ((i == 0) || (error < currError)) { CopyCodebook(&CBnew, pCB); CopyPartitioning(&Pnew, pP); currError = error; better = 1; } PrintRepeat(quietLevel, repeats, i, iter, error, GetClock(totalTime), better); } PrintFooterKM(quietLevel, currError, repeats, GetClock(totalTime), totalIter); FreeCodebook(&CBnew); FreePartitioning(&Pnew); FreeCodebook(&CBinit); FreePartitioning(&Pinit); return 0; } /* PerformKmeans() */
bool TSData::fromTextFile(QString fileName, TSData **ts) { int count, dim; float **Data; int ok = ReadInputData(&Data, &count, &dim, fileName.toLatin1().data()); if (!ok) { return false; } float **minMax = FindMinMax(Data, count, dim); TRAININGSET generated_ts = WriteData2CB(Data, count, dim, minMax, 1); /* TODO: we're throwing away the minmax file, that isn't nice but shouldn't matter if we don't want to do conversion back to txt */ *ts = new TSData(&generated_ts); fvDeleteSet(Data, count); fvDeleteSet(minMax, dim); FreeCodebook(&generated_ts); return true; }
//modify by QP on 2k8-4-7 //add two parameters //int dis; int cri; for distance function and criteria function seperately TRAININGSET CheckParameters(char *TSName, char *CBName, char *PAName, char *InName, int Minclus,int Maxclus, int ow) { TRAININGSET TS; /* input training set doesn't exist */ if (!ExistFile(TSName)) { ErrorMessage("\nERROR: Input training set doesn't exist: " "%s\n\n", TSName); ExitProcessing(FATAL_ERROR); } /* result codebook file exists and we are told not to overwrite */ if (ExistFile(CBName) && !ow) { ErrorMessage("\nERROR: Result codebook already exists: " "%s\n\n", CBName); ExitProcessing(FATAL_ERROR); } /* result partitioning file exists and we are told not to overwrite */ if (*PAName && ExistFile(PAName) && !ow) { ErrorMessage("\nERROR: Result partitioning already exists: " "%s\n\n", PAName); ExitProcessing(FATAL_ERROR); } /* initial codebook / partitioning doesn't exist */ if (*InName && !DetermineFileName(InName)) { ErrorMessage("\nERROR: Initial codebook/partitioning doesn't exist: %s\n\n", InName); ExitProcessing(FATAL_ERROR); } //add by QP on 2k8-4-7 //verify whether Max is bigger than min; make sure the range is ok if (Maxclus < Minclus ) { ErrorMessage("Bad range: %i < %i.\n", Maxclus, Minclus); ExitProcessing(FATAL_ERROR); } ReadTrainingSet(TSName, &TS); //add by QP on 2k7-11-7 //the size of training set should be at least more than min codebook /* result codebook cannot contain more vectors than training set */ if (BookSize(&TS) < Minclus) { ErrorMessage("\nERROR: Number of vectors in training set "); ErrorMessage("(%d) < Min number of clusters ", BookSize(&TS)); ErrorMessage("(%d%d)!\n\n", Minclus, Maxclus); FreeCodebook(&TS); ExitProcessing(FATAL_ERROR); } return TS; } /* CheckParameters() */
//modify by QP on 2k8-4-7 //change the function ReadInitialCBorPA( ) //add two parameters int Minclus and Maxclus,delete int clus //Minclus -- Min number of clusters //Maxclus -- Max number of clusters int ReadInitialCBorPA(char *InName, int Minclus, int Maxclus, TRAININGSET *pTS, CODEBOOK *pCB, PARTITIONING *pP) { int useInitial = 0; if (*InName) /* we use initial codebook/partitioning */ { switch (DetermineCBFileType(InName)) { case TSFILE: case CBFILE: ReadCodebook(InName, pCB); useInitial = 1; //modify by QP on 2k8-4-7 //If we use initial ones, we just need to judge the size of codebook/ //training set is in the range: [Minclus, Maxclus] if (BookSize(pCB) < Minclus || BookSize(pCB) > Maxclus ) { ErrorMessage("\nERROR: Number of vectors in initial codebook "); ErrorMessage("(%d) <> number of clusters ", BookSize(pCB)); ErrorMessage("(%d)(%d)!\n\n", Minclus, Maxclus); FreeCodebook(pTS); FreeCodebook(pCB); ExitProcessing(FATAL_ERROR); } CreateNewPartitioning(pP, pTS, BookSize(pCB)); break; case PAFILE: ReadPartitioning(InName, pP, pTS); useInitial = 2; //modify by QP on 2k8-4-7 //judge the size of partitioning is in the range: [Minclus, Maxclus] if (PartitionCount(pP)<Minclus || PartitionCount(pP) > Maxclus) { ErrorMessage("\nERROR: Number of partitions in initial partitioning "); ErrorMessage("(%d) <> number of clusters ", PartitionCount(pP)); ErrorMessage("(%d)(%d)!\n\n", Minclus, Maxclus); FreeCodebook(pTS); FreePartitioning(pP); ExitProcessing(FATAL_ERROR); } CreateNewCodebook(pCB, PartitionCount(pP), pTS); break; case NOTFOUND: ErrorMessage("\nERROR: Type of initial codebook/partitioning file " "%s is unidentified!\n\n", InName); FreeCodebook(pTS); ExitProcessing(FATAL_ERROR); break; } } else /* we don't use initial codebook/partitioning */ { // CreateNewCodebook(pCB, clus, pTS); // CreateNewPartitioning(pP, pTS, clus); useInitial = 0; } return useInitial; }
TSData::~TSData() { FreeCodebook(&this->trainingSet); }