예제 #1
0
int PerformKMeans(TRAININGSET *pTS, CODEBOOK *pCB, PARTITIONING *pP,
		  int clus, int repeats, int InitMethod, 
		  int quietLevel, int useInitial)
{
  PARTITIONING  Pnew, Pinit;
  CODEBOOK      CBnew, CBinit;
  llong         distance[BookSize(pTS)];
  llong         distanceInit[BookSize(pTS)];
  double        totalTime, error, currError;
  int           i, better, iter, totalIter;

  SetClock(&totalTime);
  totalIter = 0;
  currError = error = 0;

  if ((clus < 1) || (BookSize(pTS) < clus) || (repeats < 1))
  {
    return 1;   /* clustering failed */
  }

  InitializeSolutions(pTS, pCB, pP, &CBnew, &Pnew, &CBinit, &Pinit, 
      distanceInit, clus, useInitial);

  PrintHeader(quietLevel);

  /* perform repeats time full K-means */
  for (i = 0; i < repeats; i++)
  {
    better = iter = 0;

    GenerateSolution(pTS, &CBnew, &Pnew, &CBinit, &Pinit, distance, 
		     distanceInit, InitMethod, useInitial);          
    KMeansIterate(pTS, &CBnew, &Pnew, distance, quietLevel, i, &iter, 
        &totalTime, &error, useInitial);

    totalIter += iter;

    /* got better result */
    if ((i == 0) || (error < currError)) 
    {
      CopyCodebook(&CBnew, pCB);
      CopyPartitioning(&Pnew, pP);
      currError = error;
      better = 1;
    }

    PrintRepeat(quietLevel, repeats, i, iter, error, GetClock(totalTime), better);
  }

  PrintFooterKM(quietLevel, currError, repeats, GetClock(totalTime), totalIter);

  FreeCodebook(&CBnew);
  FreePartitioning(&Pnew);
  FreeCodebook(&CBinit);
  FreePartitioning(&Pinit);

  return 0;
}  /* PerformKmeans() */
예제 #2
0
bool TSData::fromTextFile(QString fileName, TSData **ts)
{
    int count, dim;
    float **Data;

    int ok = ReadInputData(&Data, &count, &dim, fileName.toLatin1().data());
    if (!ok)
    {
        return false;
    }

    float **minMax = FindMinMax(Data, count, dim);
    TRAININGSET generated_ts = WriteData2CB(Data, count, dim, minMax, 1);
    /* TODO: we're throwing away the minmax file, that isn't nice
       but shouldn't matter if we don't want to do conversion back to txt */

    *ts = new TSData(&generated_ts);

    fvDeleteSet(Data, count);
    fvDeleteSet(minMax, dim);

    FreeCodebook(&generated_ts);

    return true;
}
예제 #3
0
 //modify by QP on 2k8-4-7
//add two parameters 
//int dis; int cri; for distance function and criteria function seperately
TRAININGSET CheckParameters(char *TSName, char *CBName, char *PAName, 
char *InName, int Minclus,int Maxclus, int ow) {
  TRAININGSET TS;
  
  /* input training set doesn't exist */
  if (!ExistFile(TSName)) 
  {
    ErrorMessage("\nERROR: Input training set doesn't exist: "
        "%s\n\n", TSName);
    ExitProcessing(FATAL_ERROR);
  }
    
  /* result codebook file exists and we are told not to overwrite */
  if (ExistFile(CBName) && !ow) 
  {
    ErrorMessage("\nERROR: Result codebook already exists: "
        "%s\n\n", CBName);
    ExitProcessing(FATAL_ERROR);
  }

  /* result partitioning file exists and we are told not to overwrite */
  if (*PAName && ExistFile(PAName) && !ow) 
  {
    ErrorMessage("\nERROR: Result partitioning already exists: "
        "%s\n\n", PAName);
    ExitProcessing(FATAL_ERROR);
  }
  
  /* initial codebook / partitioning doesn't exist */
  if (*InName && !DetermineFileName(InName))
  {
    ErrorMessage("\nERROR: Initial codebook/partitioning doesn't exist: %s\n\n", InName);
    ExitProcessing(FATAL_ERROR);
  }
   //add by QP on 2k8-4-7
  //verify whether Max is bigger than min; make sure the range is ok
    if (Maxclus < Minclus ) 
	{
		ErrorMessage("Bad range: %i < %i.\n", Maxclus, Minclus);
		ExitProcessing(FATAL_ERROR);
    } 
  
  ReadTrainingSet(TSName, &TS);

 
  //add by QP on 2k7-11-7  
  //the size of training set should be at least more than min codebook
  /* result codebook cannot contain more vectors than training set */
  if (BookSize(&TS) < Minclus)
  {
    ErrorMessage("\nERROR: Number of vectors in training set ");
    ErrorMessage("(%d) < Min number of clusters ", BookSize(&TS));
    ErrorMessage("(%d%d)!\n\n", Minclus, Maxclus);
    FreeCodebook(&TS);
    ExitProcessing(FATAL_ERROR);
  }
  
  return TS;
}  /* CheckParameters() */
예제 #4
0
//modify by QP on 2k8-4-7 
//change the function ReadInitialCBorPA( )
//add two parameters int Minclus and Maxclus,delete int clus
//Minclus -- Min number of clusters
//Maxclus -- Max number of clusters
int ReadInitialCBorPA(char *InName, int Minclus, int Maxclus, TRAININGSET *pTS, 
CODEBOOK *pCB, PARTITIONING *pP) 
{
  int useInitial = 0;
  
  if (*InName)  /* we use initial codebook/partitioning */
  {
    switch (DetermineCBFileType(InName)) {
      case TSFILE: case CBFILE:
        ReadCodebook(InName, pCB);
        useInitial = 1;
        //modify by QP on 2k8-4-7 
		//If we use initial ones, we just need to judge the size of codebook/
		//training set is in the range: [Minclus, Maxclus]
		if (BookSize(pCB) < Minclus || BookSize(pCB) > Maxclus )
        {
          ErrorMessage("\nERROR: Number of vectors in initial codebook ");
          ErrorMessage("(%d) <> number of clusters ", BookSize(pCB));
          ErrorMessage("(%d)(%d)!\n\n", Minclus, Maxclus);
          FreeCodebook(pTS);
          FreeCodebook(pCB);
          ExitProcessing(FATAL_ERROR);
        }
        
        CreateNewPartitioning(pP, pTS, BookSize(pCB));
        break;
        
      case PAFILE:
        ReadPartitioning(InName, pP, pTS);
        useInitial = 2;
        
        //modify by QP on 2k8-4-7 
		//judge the size of partitioning is in the range: [Minclus, Maxclus]
        if (PartitionCount(pP)<Minclus || PartitionCount(pP) > Maxclus)
        {
          ErrorMessage("\nERROR: Number of partitions in initial partitioning ");
          ErrorMessage("(%d) <> number of clusters ", PartitionCount(pP));
          ErrorMessage("(%d)(%d)!\n\n", Minclus, Maxclus);
          FreeCodebook(pTS);
          FreePartitioning(pP);
          ExitProcessing(FATAL_ERROR);
        }
        
        CreateNewCodebook(pCB, PartitionCount(pP), pTS);
        break;
        
      case NOTFOUND:
        ErrorMessage("\nERROR: Type of initial codebook/partitioning file "
            "%s is unidentified!\n\n", InName);
        FreeCodebook(pTS);
        ExitProcessing(FATAL_ERROR);
        break;
    }      
  }
  else  /* we don't use initial codebook/partitioning */
  {
 //   CreateNewCodebook(pCB, clus, pTS);
 //   CreateNewPartitioning(pP, pTS, clus);
    useInitial = 0;
  }
  
  return useInitial;
}
예제 #5
0
TSData::~TSData()
{
    FreeCodebook(&this->trainingSet);
}