/** * This program reads in a text file consisting of feature * samples from a training page in the following format: * @verbatim FontName CharName NumberOfFeatureTypes(N) FeatureTypeName1 NumberOfFeatures(M) Feature1 ... FeatureM FeatureTypeName2 NumberOfFeatures(M) Feature1 ... FeatureM ... FeatureTypeNameN NumberOfFeatures(M) Feature1 ... FeatureM FontName CharName ... @endverbatim * It then appends these samples into a separate file for each * character. The name of the file is * * DirectoryName/FontName/CharName.FeatureTypeName * * The DirectoryName can be specified via a command * line argument. If not specified, it defaults to the * current directory. The format of the resulting files is: * @verbatim NumberOfFeatures(M) Feature1 ... FeatureM NumberOfFeatures(M) ... @endverbatim * The output files each have a header which describes the * type of feature which the file contains. This header is * in the format required by the clusterer. A command line * argument can also be used to specify that only the first * N samples of each class should be used. * @param argc number of command line arguments * @param argv array of command line arguments * @return none * @note Globals: none * @note Exceptions: none * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ int main(int argc, char *argv[]) { // Set the global Config parameters before parsing the command line. Config = CNConfig; const char *PageName; FILE *TrainingPage; LIST CharList = NIL_LIST; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL_LIST; LIST NormProtoList = NIL_LIST; LIST pCharList; LABELEDLIST CharSample; FEATURE_DEFS_STRUCT FeatureDefs; InitFeatureDefs(&FeatureDefs); ParseArguments(&argc, &argv); int num_fonts = 0; while ((PageName = GetNextFilename(argc, argv)) != NULL) { printf("Reading %s ...\n", PageName); TrainingPage = Efopen(PageName, "rb"); ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, 100, NULL, TrainingPage, &CharList); fclose(TrainingPage); ++num_fonts; } printf("Clustering ...\n"); // To allow an individual font to form a separate cluster, // reduce the min samples: // Config.MinSamples = 0.5 / num_fonts; pCharList = CharList; // The norm protos will count the source protos, so we keep them here in // freeable_protos, so they can be freed later. GenericVector<LIST> freeable_protos; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST)first_node(pCharList); Clusterer = SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); if (Clusterer == NULL) { // To avoid a SIGSEGV fprintf(stderr, "Error: NULL clusterer!\n"); return 1; } float SavedMinSamples = Config.MinSamples; // To disable the tendency to produce a single cluster for all fonts, // make MagicSamples an impossible to achieve number: // Config.MagicSamples = CharSample->SampleCount * 10; Config.MagicSamples = CharSample->SampleCount; while (Config.MinSamples > 0.001) { ProtoList = ClusterSamples(Clusterer, &Config); if (NumberOfProtos(ProtoList, 1, 0) > 0) { break; } else { Config.MinSamples *= 0.95; printf("0 significant protos for %s." " Retrying clustering with MinSamples = %f%%\n", CharSample->Label, Config.MinSamples); } } Config.MinSamples = SavedMinSamples; AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); freeable_protos.push_back(ProtoList); FreeClusterer(Clusterer); } FreeTrainingSamples(CharList); int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE); WriteNormProtos(FLAGS_D.c_str(), NormProtoList, FeatureDefs.FeatureDesc[desc_index]); FreeNormProtoList(NormProtoList); for (int i = 0; i < freeable_protos.size(); ++i) { FreeProtoList(&freeable_protos[i]); } printf ("\n"); return 0; } // main
/*---------------------------------------------------------------------------*/ int main ( int argc, char **argv) /* ** Parameters: ** argc number of command line arguments ** argv array of command line arguments ** Globals: none ** Operation: ** This program reads in a text file consisting of feature ** samples from a training page in the following format: ** ** FontName CharName NumberOfFeatureTypes(N) ** FeatureTypeName1 NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** FeatureTypeName2 NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** ... ** FeatureTypeNameN NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** FontName CharName ... ** ** It then appends these samples into a separate file for each ** character. The name of the file is ** ** DirectoryName/FontName/CharName.FeatureTypeName ** ** The DirectoryName can be specified via a command ** line argument. If not specified, it defaults to the ** current directory. The format of the resulting files is: ** ** NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** NumberOfFeatures(M) ** ... ** ** The output files each have a header which describes the ** type of feature which the file contains. This header is ** in the format required by the clusterer. A command line ** argument can also be used to specify that only the first ** N samples of each class should be used. ** Return: none ** Exceptions: none ** History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ { char *PageName; FILE *TrainingPage; LIST CharList = NIL_LIST; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL_LIST; LIST NormProtoList = NIL_LIST; LIST pCharList; LABELEDLIST CharSample; FEATURE_DEFS_STRUCT FeatureDefs; InitFeatureDefs(&FeatureDefs); ParseArguments(argc, argv); int num_fonts = 0; while ((PageName = GetNextFilename(argc, argv)) != NULL) { printf("Reading %s ...\n", PageName); TrainingPage = Efopen(PageName, "r"); ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, 100, 1.0f / 64.0f, 0.0f, NULL, TrainingPage, &CharList); fclose(TrainingPage); ++num_fonts; } printf("Clustering ...\n"); // To allow an individual font to form a separate cluster, // reduce the min samples: // Config.MinSamples = 0.5 / num_fonts; pCharList = CharList; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST)first_node(pCharList); Clusterer = SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); float SavedMinSamples = Config.MinSamples; // To disable the tendency to produce a single cluster for all fonts, // make MagicSamples an impossible to achieve number: // Config.MagicSamples = CharSample->SampleCount * 10; Config.MagicSamples = CharSample->SampleCount; while (Config.MinSamples > 0.001) { ProtoList = ClusterSamples(Clusterer, &Config); if (NumberOfProtos(ProtoList, 1, 0) > 0) { break; } else { Config.MinSamples *= 0.95; printf("0 significant protos for %s." " Retrying clustering with MinSamples = %f%%\n", CharSample->Label, Config.MinSamples); } } Config.MinSamples = SavedMinSamples; AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); } FreeTrainingSamples(CharList); if (Clusterer == NULL) // To avoid a SIGSEGV return 1; WriteNormProtos (Directory, NormProtoList, Clusterer); FreeClusterer(Clusterer); FreeProtoList(&ProtoList); FreeNormProtoList(NormProtoList); printf ("\n"); return 0; } // main
/*---------------------------------------------------------------------------*/ int main ( int argc, char **argv) /* ** Parameters: ** argc number of command line arguments ** argv array of command line arguments ** Globals: none ** Operation: ** This program reads in a text file consisting of feature ** samples from a training page in the following format: ** ** FontName CharName NumberOfFeatureTypes(N) ** FeatureTypeName1 NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** FeatureTypeName2 NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** ... ** FeatureTypeNameN NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** FontName CharName ... ** ** It then appends these samples into a separate file for each ** character. The name of the file is ** ** DirectoryName/FontName/CharName.FeatureTypeName ** ** The DirectoryName can be specified via a command ** line argument. If not specified, it defaults to the ** current directory. The format of the resulting files is: ** ** NumberOfFeatures(M) ** Feature1 ** ... ** FeatureM ** NumberOfFeatures(M) ** ... ** ** The output files each have a header which describes the ** type of feature which the file contains. This header is ** in the format required by the clusterer. A command line ** argument can also be used to specify that only the first ** N samples of each class should be used. ** Return: none ** Exceptions: none ** History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ { char *PageName; FILE *TrainingPage; LIST CharList = NIL; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL; LIST NormProtoList = NIL; LIST pCharList; LABELEDLIST CharSample; ParseArguments (argc, argv); while ((PageName = GetNextFilename()) != NULL) { printf ("Reading %s ...\n", PageName); TrainingPage = Efopen (PageName, "r"); ReadTrainingSamples (TrainingPage, &CharList); fclose (TrainingPage); //WriteTrainingSamples (Directory, CharList); } printf("Clustering ...\n"); pCharList = CharList; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST) first_node (pCharList); //printf ("\nClustering %s ...", CharSample->Label); Clusterer = SetUpForClustering(CharSample); float SavedMinSamples = Config.MinSamples; Config.MagicSamples = CharSample->SampleCount; while (Config.MinSamples > 0.001) { ProtoList = ClusterSamples(Clusterer, &Config); if (NumberOfProtos(ProtoList, 1, 0) > 0) break; else { Config.MinSamples *= 0.95; printf("0 significant protos for %s." " Retrying clustering with MinSamples = %f%%\n", CharSample->Label, Config.MinSamples); } } Config.MinSamples = SavedMinSamples; AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); } FreeTrainingSamples (CharList); WriteNormProtos (Directory, NormProtoList, Clusterer); FreeClusterer(Clusterer); FreeProtoList(&ProtoList); FreeNormProtoList(NormProtoList); printf ("\n"); return 0; } // main