SipperFileListPtr  GetListOfSipperFiles (DataBasePtr           dbConn,
                                         ImageFeaturesListPtr  examples,
                                         RunLog&               log
                                        )
{
  map<KKStr,KKStr>  sipperFiles;
  map<KKStr,KKStr>::iterator  sipperFilesIdx;
  SipperFileListPtr files = new SipperFileList (true);

  ImageFeaturesList::iterator  idx;
  for  (idx = examples->begin ();  idx != examples->end ();  idx++)
  {
    ImageFeaturesPtr  i = *idx;
    KKStr  imageFileName =  i->ExampleFileName ();
    KKStr   sipperFileName;
    kkuint32  scanLineNum = 0;
    kkuint32  scanCol     = 0;

    PicesVariables::ParseImageFileName (imageFileName, sipperFileName, scanLineNum, scanCol);
    sipperFilesIdx = sipperFiles.find (sipperFileName);
    if  (sipperFilesIdx == sipperFiles.end ())
    {
      sipperFiles.insert (pair<KKStr, KKStr>(sipperFileName, sipperFileName));
      SipperFilePtr sf = dbConn->SipperFileRecLoad (sipperFileName);
      if  (!sf)
      {
        sf = new SipperFile (sipperFileName);
        sf->CtdExt0 ("TRN");
        sf->CtdExt1 ("OXG");
        sf->CtdExt2 ("FLO");
        sf->CtdExt3 ("TUR");

        sf->Sp0 (Instrument::LookUpByShortName ("CTD"));
        sf->Sp1 (Instrument::LookUpByShortName ("P-R"));
        sf->Sp2 (Instrument::LookUpByShortName ("BAY"));

        dbConn->SipperFileInsert (*sf);
      }

      files->PushOnBack (sf);
    }
  }

  return  files;
}  /* GetListOfSipperFiles */
Beispiel #2
0
void	OurNeighbors::LookForNeighbors ()
{
	ImageFeaturesListPtr  currentImageFeatures = NULL;
	KKStr                relativeDir;

	log.Level (10) << "OurNeighbors::LookForNeighbors" << endl;


	/*
	 * create an image feature list from the source directory that corresponds to the
	 * current locations of the actual image files. Where possible, the feature data 
	 * file will be used. However, if an image has been moved it's features will have
	 * to be recalculated (which is handled by the function call) and we'll have to
	 * look in the origImageFeatures list for the original predicted class. We must do 
	 * this since the predicted class for an image file should NEVER change between
	 * classification runs.
	 */
  FeatureFileIOPices::Driver ()->LoadInSubDirectoryTree
                                 (PicesFVProducerFactory::Factory (&log),
                                  sourceRootDirPath,
                                  *mlClasses,
                                  false,           // useDirectoryNameForClassName,
                                  DB (),
                                  cancelFlag,
                                  false,           // rewiteRootFeatureFile
                                  log
                                 );

  currentImageFeatures->FixSipperFileScanLineAndColFields ();
  lastScanLine = LastScanLine (*currentImageFeatures);
  {
    // Make sure Class name matches subdirectory that Example was found in.
    ImageFeaturesList::iterator  idx;
    for  (idx = currentImageFeatures->begin ();  idx != currentImageFeatures->end ();  idx++)
    {
      ImageFeaturesPtr image = *idx;
      MLClassPtr  mlClass = DetermineClassFromFileName (image->ExampleFileName ());
      if  (mlClass)
        image->MLClass (mlClass);
    }
  }

  if  (excludedClasses)  
  {
    if  (excludedClasses->QueueSize () > 0)
      RemoveExcludedClasses (currentImageFeatures);
  }

  //if  (randomizeLocations)
  //  RandomizeLocations (*currentImageFeatures);

  if  (!fromPlanktonName.Empty ())
  {
    fromPlankton = mlClasses->LookUpByName (fromPlanktonName);
    if  (!fromPlankton)
    {
      log.Level (-1) << endl
                     << endl
                     << endl
                     << "LookForNeighbors     ****** ERROR *******" << endl
                     << endl
                     << "No images that are of PlanktonName[" << fromPlanktonName << "]" << endl
                     << endl;
      osWaitForEnter ();
      exit (-1);
    }
  }

  // We will now build Neighbor List
  NeighborList  neighbors (*currentImageFeatures, log);
  neighbors.FindNearestNeighbors (neighborType, fromPlankton);


  double  allClassesMeanNND    = 0.0f;
  double  allClassesMeanStdDev = 0.0f;
  double  allClassesMinDist    = 0.0f;
  double  allClassesMaxDist    = 0.0f;

  neighbors.CalcStatistics (allClassesMeanNND,
                            allClassesMeanStdDev, 
                            allClassesMinDist,
                            allClassesMaxDist
                           );



  if  (fromPlankton)
    neighbors.ReportClassRowRestricted (mlClasses, *report, fromPlankton);
  else
    neighbors.ReportClassRow (mlClasses, *report);

  neighbors.ReportClassNeighbor (mlClasses, *report);

  if  (randomizeLocations)
    RandomReport (*currentImageFeatures);

	log.Level (10) << "OurNeighbors::LookForNeighbors   Exiting"  << endl;
}  /* LookForNeighbors */
void  DeleteDuplicateImages::DeleteImages ()
{
  PrintStandardHeaderInfo (*r);

  *r << "Root Directory :" << rootDir               << "]" << endl;
  *r << "Report File    :" << reportFileName        << "]" << endl;
  *r << endl;

  ImageFeaturesListPtr  images = FeatureFileIOPices::Driver ()->LoadInSubDirectoryTree 
                                     (PicesFVProducerFactory::Factory (&log),
                                      rootDir,
                                      mlClasses,
                                      false,       // false = DONT _useDirectoryNameForClassName
                                      DB (),
                                      cancelFlag,
                                      false,       // false = DONT _rewiteRootFeatureFile
                                      log
                                     );

  KKStr  rootFeatureFileName = osAddSlash (rootDir) + osGetRootName (rootDir) + ".data";
  //images->WriteImageFeaturesToFile (rootFeatureFileName, RawFormat, FeatureNumList::AllFeatures ());

  bool  successful = false;
  uint  numExamplesWritten = 0;
  FeatureFileIOPices::Driver ()->SaveFeatureFile (rootFeatureFileName, images->AllFeatures (), *images, numExamplesWritten, cancelFlag, successful, log);

  *r << "Class Statistics" << endl;
  *r << images->ClassStatisticsStr ();
  *r << endl;

  
  // We can now look for duplicates in list.
  DuplicateImagesPtr  dupLocator = new DuplicateImages (images, log);

  DuplicateImageListPtr  setsOfDupImages = dupLocator->DupExamples ();

  *r << "Number of Duplicate Sets [" << setsOfDupImages->QueueSize () << "]" << endl;
  *r << endl;

  int  dupsDeleted        = 0;
  int  dupsFailedToDelete = 0;

  DuplicateImageList::iterator  dsIDX;
  for  (dsIDX = setsOfDupImages->begin ();  dsIDX != setsOfDupImages->end ();  ++dsIDX)
  {
    DuplicateImagePtr  dupSet = *dsIDX;

    ImageFeaturesListPtr  dups = new ImageFeaturesList (*(dupSet->DuplicatedImages ()), false);

    ImageFeaturesPtr      imageToKeep = (ImageFeaturesPtr)dupSet->ExampleWithSmallestScanLine ();
    ImageFeaturesPtr      firstImage  = NULL;
    bool                  allTheSameClass = dupSet->AllTheSameClass ();
    if  (!allTheSameClass)
    {
      imageToKeep = NULL;
      *r << endl
         << "Images in this set of duplicates are of different classes, will delete all of them." << endl;
    }
    else
    {
      *r << endl
         << "Duplicate Class[" << imageToKeep->MLClassName () << "]  FileName[" << imageToKeep->ExampleFileName () << "]  Keeping." << endl;
    }

    for  (ImageFeaturesList::iterator iIDX = dups->begin ();  iIDX != dups->end ();  iIDX++)
    {
      ImageFeaturesPtr i = *iIDX;

      if  (!firstImage)
        firstImage = i;

      if  (i != imageToKeep)
      {
        KKStr  fullFileName = osAddSlash (rootDir) + i->ExampleFileName ();

        *r << "          Class[" << i->MLClassName () << "]  FileName[" << i->ExampleFileName () << "]  ";

        if  (!duplicateImagesDir.Empty ())
        {
          // We will move image to duplicates directory
          int  dupCount = 0;
          KKStr newFileName;
          do {
            if  (imageToKeep)
            {
              newFileName = osAddSlash (duplicateImagesDir) + 
                            osGetRootName (imageToKeep->ExampleFileName ()) 
                            + "-" +
                            osGetRootName (i->ExampleFileName ());
            }
            else
            {
              newFileName = osAddSlash (duplicateImagesDir) + 
                            osGetRootName (firstImage->ExampleFileName ()) 
                            + "-" +
                            osGetRootName (i->ExampleFileName ());
            }
            if  (dupCount > 0)
              newFileName << "-" << dupCount;
            newFileName << ".bmp";
            dupCount++;
          }  while  (osFileExists (newFileName));

          osCopyFile (fullFileName, newFileName);
          *r << "moved to[" << newFileName << "]  ";
          cout << "Coping [" << fullFileName << "] to Duplicates List" << endl;
        }

        bool  deleted = osDeleteFile (fullFileName);

        if  (deleted)
        {
          *r << "* DELETED *" << endl;
          dupsDeleted++;
          images->DeleteEntry (i);
        }
        else
        {
          *r << "--- Failed To Delete ---" << endl;
          dupsFailedToDelete++;
        }
      }
    }
  }

  *r << endl;
  *r << "Duplicates Successfully Deleted[" << dupsDeleted        << "]" << endl;
  *r << "Duplicates Failed to Delete    [" << dupsFailedToDelete << "]" << endl;
  *r << endl;

  // We will write out the feature file again with the dup images removed.
  //images->WriteImageFeaturesToFile (rootFeatureFileName, RawFormat, FeatureNumList::AllFeatures ());
  numExamplesWritten= 0;
  FeatureFileIOPices::Driver ()->SaveFeatureFile (rootFeatureFileName, images->AllFeatures (), *images, numExamplesWritten, cancelFlag, successful, log);

  delete  dupLocator;  dupLocator = NULL;
  delete  images;      images     = NULL;
}  /* DeleteDuplicateImages */