Пример #1
0
/**
* This program reads in a text file consisting of feature
* samples from a training page in the following format:
* @verbatim
   FontName CharName NumberOfFeatureTypes(N)
      FeatureTypeName1 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      FeatureTypeName2 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      ...
      FeatureTypeNameN NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
   FontName CharName ...
@endverbatim
* It then appends these samples into a separate file for each
* character.  The name of the file is
*
*   DirectoryName/FontName/CharName.FeatureTypeName
*
* The DirectoryName can be specified via a command
* line argument.  If not specified, it defaults to the
* current directory.  The format of the resulting files is:
* @verbatim
   NumberOfFeatures(M)
      Feature1
      ...
      FeatureM
   NumberOfFeatures(M)
   ...
@endverbatim
* The output files each have a header which describes the
* type of feature which the file contains.  This header is
* in the format required by the clusterer.  A command line
* argument can also be used to specify that only the first
* N samples of each class should be used.
* @param argc  number of command line arguments
* @param argv  array of command line arguments
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
*/
int main(int argc, char *argv[]) {
  // Set the global Config parameters before parsing the command line.
  Config = CNConfig;

  const char  *PageName;
  FILE  *TrainingPage;
  LIST  CharList = NIL_LIST;
  CLUSTERER  *Clusterer = NULL;
  LIST    ProtoList = NIL_LIST;
  LIST    NormProtoList = NIL_LIST;
  LIST pCharList;
  LABELEDLIST CharSample;
  FEATURE_DEFS_STRUCT FeatureDefs;
  InitFeatureDefs(&FeatureDefs);

  ParseArguments(&argc, &argv);
  int num_fonts = 0;
  while ((PageName = GetNextFilename(argc, argv)) != NULL) {
    printf("Reading %s ...\n", PageName);
    TrainingPage = Efopen(PageName, "rb");
    ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE,
                        100, NULL, TrainingPage, &CharList);
    fclose(TrainingPage);
    ++num_fonts;
  }
  printf("Clustering ...\n");
  // To allow an individual font to form a separate cluster,
  // reduce the min samples:
  // Config.MinSamples = 0.5 / num_fonts;
  pCharList = CharList;
  // The norm protos will count the source protos, so we keep them here in
  // freeable_protos, so they can be freed later.
  GenericVector<LIST> freeable_protos;
  iterate(pCharList) {
    //Cluster
    CharSample = (LABELEDLIST)first_node(pCharList);
    Clusterer =
      SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
    if (Clusterer == NULL) {  // To avoid a SIGSEGV
      fprintf(stderr, "Error: NULL clusterer!\n");
      return 1;
    }
    float SavedMinSamples = Config.MinSamples;
    // To disable the tendency to produce a single cluster for all fonts,
    // make MagicSamples an impossible to achieve number:
    // Config.MagicSamples = CharSample->SampleCount * 10;
    Config.MagicSamples = CharSample->SampleCount;
    while (Config.MinSamples > 0.001) {
      ProtoList = ClusterSamples(Clusterer, &Config);
      if (NumberOfProtos(ProtoList, 1, 0) > 0) {
        break;
      } else {
        Config.MinSamples *= 0.95;
        printf("0 significant protos for %s."
               " Retrying clustering with MinSamples = %f%%\n",
               CharSample->Label, Config.MinSamples);
      }
    }
    Config.MinSamples = SavedMinSamples;
    AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
    freeable_protos.push_back(ProtoList);
    FreeClusterer(Clusterer);
  }
  FreeTrainingSamples(CharList);
  int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE);
  WriteNormProtos(FLAGS_D.c_str(), NormProtoList,
                  FeatureDefs.FeatureDesc[desc_index]);
  FreeNormProtoList(NormProtoList);
  for (int i = 0; i < freeable_protos.size(); ++i) {
    FreeProtoList(&freeable_protos[i]);
  }
  printf ("\n");
  return 0;
}  // main
Пример #2
0
/*---------------------------------------------------------------------------*/
int main (
     int  argc,
     char  **argv)

/*
**  Parameters:
**    argc  number of command line arguments
**    argv  array of command line arguments
**  Globals: none
**  Operation:
**    This program reads in a text file consisting of feature
**    samples from a training page in the following format:
**
**      FontName CharName NumberOfFeatureTypes(N)
**         FeatureTypeName1 NumberOfFeatures(M)
**            Feature1
**            ...
**            FeatureM
**         FeatureTypeName2 NumberOfFeatures(M)
**            Feature1
**            ...
**            FeatureM
**         ...
**         FeatureTypeNameN NumberOfFeatures(M)
**            Feature1
**            ...
**            FeatureM
**      FontName CharName ...
**
**    It then appends these samples into a separate file for each
**    character.  The name of the file is
**
**      DirectoryName/FontName/CharName.FeatureTypeName
**
**    The DirectoryName can be specified via a command
**    line argument.  If not specified, it defaults to the
**    current directory.  The format of the resulting files is:
**
**      NumberOfFeatures(M)
**         Feature1
**         ...
**         FeatureM
**      NumberOfFeatures(M)
**      ...
**
**    The output files each have a header which describes the
**    type of feature which the file contains.  This header is
**    in the format required by the clusterer.  A command line
**    argument can also be used to specify that only the first
**    N samples of each class should be used.
**  Return: none
**  Exceptions: none
**  History: Fri Aug 18 08:56:17 1989, DSJ, Created.
*/

{
  char  *PageName;
  FILE  *TrainingPage;
  LIST  CharList = NIL_LIST;
  CLUSTERER  *Clusterer = NULL;
  LIST    ProtoList = NIL_LIST;
  LIST    NormProtoList = NIL_LIST;
  LIST pCharList;
  LABELEDLIST CharSample;
  FEATURE_DEFS_STRUCT FeatureDefs;
  InitFeatureDefs(&FeatureDefs);

  ParseArguments(argc, argv);
  int num_fonts = 0;
  while ((PageName = GetNextFilename(argc, argv)) != NULL) {
    printf("Reading %s ...\n", PageName);
    TrainingPage = Efopen(PageName, "r");
    ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE,
                        100, 1.0f / 64.0f, 0.0f, NULL, TrainingPage, &CharList);
    fclose(TrainingPage);
    ++num_fonts;
  }
  printf("Clustering ...\n");
  // To allow an individual font to form a separate cluster,
  // reduce the min samples:
  // Config.MinSamples = 0.5 / num_fonts;
  pCharList = CharList;
  iterate(pCharList) {
    //Cluster
    CharSample = (LABELEDLIST)first_node(pCharList);
    Clusterer =
      SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
    float SavedMinSamples = Config.MinSamples;
    // To disable the tendency to produce a single cluster for all fonts,
    // make MagicSamples an impossible to achieve number:
    // Config.MagicSamples = CharSample->SampleCount * 10;
    Config.MagicSamples = CharSample->SampleCount;
    while (Config.MinSamples > 0.001) {
      ProtoList = ClusterSamples(Clusterer, &Config);
      if (NumberOfProtos(ProtoList, 1, 0) > 0) {
        break;
      } else {
        Config.MinSamples *= 0.95;
        printf("0 significant protos for %s."
               " Retrying clustering with MinSamples = %f%%\n",
               CharSample->Label, Config.MinSamples);
      }
    }
    Config.MinSamples = SavedMinSamples;
    AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
  }
  FreeTrainingSamples(CharList);
  if (Clusterer == NULL) // To avoid a SIGSEGV
    return 1;
  WriteNormProtos (Directory, NormProtoList, Clusterer);
  FreeClusterer(Clusterer);
  FreeProtoList(&ProtoList);
  FreeNormProtoList(NormProtoList);
  printf ("\n");
  return 0;
}  // main
Пример #3
0
/*---------------------------------------------------------------------------*/
int main (
     int	argc,
     char	**argv)

/*
**	Parameters:
**		argc	number of command line arguments
**		argv	array of command line arguments
**	Globals: none
**	Operation:
**		This program reads in a text file consisting of feature
**		samples from a training page in the following format:
**
**			FontName CharName NumberOfFeatureTypes(N)
**			   FeatureTypeName1 NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			   FeatureTypeName2 NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			   ...
**			   FeatureTypeNameN NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			FontName CharName ...
**
**		It then appends these samples into a separate file for each
**		character.  The name of the file is
**
**			DirectoryName/FontName/CharName.FeatureTypeName
**
**		The DirectoryName can be specified via a command
**		line argument.  If not specified, it defaults to the
**		current directory.  The format of the resulting files is:
**
**			NumberOfFeatures(M)
**			   Feature1
**			   ...
**			   FeatureM
**			NumberOfFeatures(M)
**			...
**
**		The output files each have a header which describes the
**		type of feature which the file contains.  This header is
**		in the format required by the clusterer.  A command line
**		argument can also be used to specify that only the first
**		N samples of each class should be used.
**	Return: none
**	Exceptions: none
**	History: Fri Aug 18 08:56:17 1989, DSJ, Created.
*/

{
	char	*PageName;
	FILE	*TrainingPage;
	LIST	CharList = NIL;
	CLUSTERER	*Clusterer = NULL;
	LIST		ProtoList = NIL;
	LIST		NormProtoList = NIL;
	LIST pCharList;
	LABELEDLIST CharSample;

	ParseArguments (argc, argv);
	while ((PageName = GetNextFilename()) != NULL)
	{
		printf ("Reading %s ...\n", PageName);
		TrainingPage = Efopen (PageName, "r");
		ReadTrainingSamples (TrainingPage, &CharList);
		fclose (TrainingPage);
		//WriteTrainingSamples (Directory, CharList);
	}
        printf("Clustering ...\n");
	pCharList = CharList;
	iterate(pCharList)
	{
          //Cluster
          CharSample = (LABELEDLIST) first_node (pCharList);
          //printf ("\nClustering %s ...", CharSample->Label);
          Clusterer = SetUpForClustering(CharSample);
          float SavedMinSamples = Config.MinSamples;
          Config.MagicSamples = CharSample->SampleCount;
          while (Config.MinSamples > 0.001) {
            ProtoList = ClusterSamples(Clusterer, &Config);
            if (NumberOfProtos(ProtoList, 1, 0) > 0)
              break;
            else {
              Config.MinSamples *= 0.95;
              printf("0 significant protos for %s."
                     " Retrying clustering with MinSamples = %f%%\n",
                     CharSample->Label, Config.MinSamples);
            }
          }
          Config.MinSamples = SavedMinSamples;
          AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
	}
	FreeTrainingSamples (CharList);
	WriteNormProtos (Directory, NormProtoList, Clusterer);
	FreeClusterer(Clusterer);
	FreeProtoList(&ProtoList);
	FreeNormProtoList(NormProtoList);
	printf ("\n");
  return 0;
}	// main