示例#1
0
/**********************************************************************
 * PrintProtos
 *
 * Print the list of prototypes in this class type.
 **********************************************************************/
void PrintProtos(CLASS_TYPE Class) {
  inT16 Pid;

  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    cprintf ("Proto %d:\t", Pid);
    PrintProto (ProtoIn (Class, Pid));
    cprintf ("\t");
    PrintProtoLine (ProtoIn (Class, Pid));
    new_line();
  }
}
示例#2
0
/**
 * This routine searches thru all of the prototypes in
 * Class and returns the id of the proto which would provide
 * the best approximation of Prototype.  If no close
 * approximation can be found, NO_PROTO is returned.
 *
 * @param Class   class to search for matching old proto in
 * @param NumMerged # of protos merged into each proto of Class
 * @param  Prototype new proto to find match for
 *
 * Globals: none
 *
 * @return Id of closest proto in Class or NO_PROTO.
 * @note Exceptions: none
 * @note History: Sat Nov 24 11:42:58 1990, DSJ, Created.
 */
int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[],
                             PROTOTYPE  *Prototype) {
  PROTO_STRUCT  NewProto;
  PROTO_STRUCT  MergedProto;
  int   Pid;
  PROTO   Proto;
  int   BestProto;
  FLOAT32 BestMatch;
  FLOAT32 Match, OldMatch, NewMatch;

  MakeNewFromOld (&NewProto, Prototype);

  BestProto = NO_PROTO;
  BestMatch = WORST_MATCH_ALLOWED;
  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    Proto  = ProtoIn(Class, Pid);
    ComputeMergedProto(Proto, &NewProto,
      (FLOAT32) NumMerged[Pid], 1.0, &MergedProto);
    OldMatch = CompareProtos(Proto, &MergedProto);
    NewMatch = CompareProtos(&NewProto, &MergedProto);
    Match = MIN(OldMatch, NewMatch);
    if (Match > BestMatch) {
      BestProto = Pid;
      BestMatch = Match;
    }
  }
  return BestProto;
} /* FindClosestExistingProto */
示例#3
0
/**********************************************************************
 * ClassProtoLength
 *
 * Return the length of all the protos in this class.
 **********************************************************************/
FLOAT32 ClassProtoLength(CLASS_TYPE Class) {
  inT16 Pid;
  FLOAT32 TotalLength = 0;

  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    TotalLength += (ProtoIn (Class, Pid))->Length;
  }
  return (TotalLength);
}
示例#4
0
/**********************************************************************
 * ClassConfigLength
 *
 * Return the length of all the protos in this class.
 **********************************************************************/
FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) {
  inT16 Pid;
  FLOAT32 TotalLength = 0;

  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    if (test_bit (Config, Pid)) {

      TotalLength += (ProtoIn (Class, Pid))->Length;
    }
  }
  return (TotalLength);
}
示例#5
0
/*---------------------------------------------------------------------------*/
int FindClosestExistingProto(CLASS_TYPE Class, int NumMerged[],
                             PROTOTYPE  *Prototype) {
/*
**  Parameters:
**    Class   class to search for matching old proto in
**    NumMerged[] # of protos merged into each proto of Class
**    Prototype new proto to find match for
**  Globals: none
**  Operation: This routine searches thru all of the prototypes in
**    Class and returns the id of the proto which would provide
**    the best approximation of Prototype.  If no close
**    approximation can be found, NO_PROTO is returned.
**  Return: Id of closest proto in Class or NO_PROTO.
**  Exceptions: none
**  History: Sat Nov 24 11:42:58 1990, DSJ, Created.
*/
  PROTO_STRUCT  NewProto;
  PROTO_STRUCT  MergedProto;
  int   Pid;
  PROTO   Proto;
  int   BestProto;
  FLOAT32 BestMatch;
  FLOAT32 Match, OldMatch, NewMatch;

  MakeNewFromOld (&NewProto, Prototype);

  BestProto = NO_PROTO;
  BestMatch = WORST_MATCH_ALLOWED;
  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    Proto  = ProtoIn(Class, Pid);
    ComputeMergedProto(Proto, &NewProto,
      (FLOAT32) NumMerged[Pid], 1.0, &MergedProto);
    OldMatch = CompareProtos(Proto, &MergedProto);
    NewMatch = CompareProtos(&NewProto, &MergedProto);
    Match = MIN(OldMatch, NewMatch);
    if (Match > BestMatch) {
      BestProto = Pid;
      BestMatch = Match;
    }
  }
  return BestProto;
} /* FindClosestExistingProto */
示例#6
0
/**********************************************************************
 * ReadProtos
 *
 * Read in all the prototype information from a file.  Read the number
 * of lines requested.
 **********************************************************************/
void ReadProtos(register FILE *File, CLASS_TYPE Class) {
  register inT16 Pid;
  register PROTO Proto;
  int NumProtos;

  fscanf (File, "%d\n", &NumProtos);
  Class->NumProtos = NumProtos;
  Class->MaxNumProtos = NumProtos;
  Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);

  for (Pid = 0; Pid < NumProtos; Pid++) {
    Proto = ProtoIn (Class, Pid);
    fscanf (File, "%f %f %f %f %f %f %f\n",
      &Proto->X,
      &Proto->Y,
      &Proto->Length,
      &Proto->Angle,
      &Proto->A,
      &Proto->B, &Proto->C);
  }
}
/*---------------------------------------------------------------------------*/
void WriteProtos(
	FILE* File,
	MERGE_CLASS MergeClass)
{
	float Values[3];
	int i;
	PROTO Proto;

	fprintf(File, "%s\n", MergeClass->Label);
	fprintf(File, "%d\n", MergeClass->Class->NumProtos);
	for(i=0; i < MergeClass->Class->NumProtos; i++)
	{
		Proto = ProtoIn(MergeClass->Class,i);
		fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", Proto->X, Proto->Y,
			Proto->Length, Proto->Angle);
		Values[0] = Proto->X;
		Values[1] = Proto->Y;
		Values[2] = Proto->Angle;
		Normalize(Values);
		fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
	}
} // WriteProtos
示例#8
0
/**********************************************************************
 * WriteOldProtoFile
 *
 * Write the protos in the given class to the specified file in the
 * old proto format.
 **********************************************************************/
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
  int Pid;
  PROTO Proto;

  /* print old header */
  fprintf (File, "6\n");
  fprintf (File, "linear   essential      -0.500000   0.500000\n");
  fprintf (File, "linear   essential      -0.250000   0.750000\n");
  fprintf (File, "linear   essential       0.000000   1.000000\n");
  fprintf (File, "circular essential       0.000000   1.000000\n");
  fprintf (File, "linear   non-essential  -0.500000   0.500000\n");
  fprintf (File, "linear   non-essential  -0.500000   0.500000\n");

  for (Pid = 0; Pid < Class->NumProtos; Pid++) {
    Proto = ProtoIn (Class, Pid);

    fprintf (File, "significant   elliptical   1\n");
    fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
      Proto->X, Proto->Y,
      Proto->Length, Proto->Angle, 0.0, 0.0);
    fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
      0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
  }
}
/*---------------------------------------------------------------------------*/
int main (int argc, char **argv) {
/*
**	Parameters:
**		argc	number of command line arguments
**		argv	array of command line arguments
**	Globals: none
**	Operation:
**		This program reads in a text file consisting of feature
**		samples from a training page in the following format:
**
**			FontName CharName NumberOfFeatureTypes(N)
**			   FeatureTypeName1 NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			   FeatureTypeName2 NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			   ...
**			   FeatureTypeNameN NumberOfFeatures(M)
**			      Feature1
**			      ...
**			      FeatureM
**			FontName CharName ...
**
**		The result of this program is a binary inttemp file used by
**		the OCR engine.
**	Return: none
**	Exceptions: none
**	History:	Fri Aug 18 08:56:17 1989, DSJ, Created.
**				Mon May 18 1998, Christy Russson, Revistion started.
*/
  char	*PageName;
  FILE	*TrainingPage;
  FILE	*OutFile;
  LIST	CharList;
  CLUSTERER	*Clusterer = NULL;
  LIST		ProtoList = NIL;
  LABELEDLIST CharSample;
  PROTOTYPE	*Prototype;
  LIST   	ClassList = NIL;
  int		Cid, Pid;
  PROTO		Proto;
  PROTO_STRUCT	DummyProto;
  BIT_VECTOR	Config2;
  MERGE_CLASS	MergeClass;
  INT_TEMPLATES	IntTemplates;
  LIST pCharList, pProtoList;
  char Filename[MAXNAMESIZE];
  tesseract::Classify classify;

  ParseArguments (argc, argv);
  if (InputUnicharsetFile == NULL) {
    InputUnicharsetFile = kInputUnicharsetFile;
  }
  if (OutputUnicharsetFile == NULL) {
    OutputUnicharsetFile = kOutputUnicharsetFile;
  }

  if (!unicharset_training.load_from_file(InputUnicharsetFile)) {
    fprintf(stderr, "Failed to load unicharset from file %s\n"
            "Building unicharset for mftraining from scratch...\n",
            InputUnicharsetFile);
    unicharset_training.clear();
    // Space character needed to represent NIL classification.
    unicharset_training.unichar_insert(" ");
  }


  if (InputFontInfoFile != NULL) {
    FILE* f = fopen(InputFontInfoFile, "r");
    if (f == NULL) {
      fprintf(stderr, "Failed to load font_properties\n");
    } else {
      int italic, bold, fixed, serif, fraktur;
      while (!feof(f)) {
        FontInfo fontinfo;
        fontinfo.name = new char[1024];
        fontinfo.properties = 0;
        if (fscanf(f, "%1024s %i %i %i %i %i\n", fontinfo.name,
                   &italic, &bold, &fixed, &serif, &fraktur) != 6)
          continue;
        fontinfo.properties =
            (italic << 0) +
            (bold << 1) +
            (fixed << 2) +
            (serif << 3) +
            (fraktur << 4);
        if (!classify.get_fontinfo_table().contains(fontinfo)) {
          classify.get_fontinfo_table().push_back(fontinfo);
        } else {
          fprintf(stderr, "Font %s already defined\n", fontinfo.name);
          return 1;
        }
      }
      fclose(f);
    }
  }

  while ((PageName = GetNextFilename(argc, argv)) != NULL) {
    printf ("Reading %s ...\n", PageName);
    char *short_name = strrchr(PageName, '/');
    if (short_name == NULL)
      short_name = PageName;
    else
      ++short_name;
    // filename is expected to be of the form [lang].[fontname].exp[num].tr
    // If it is, then set short_name to be the [fontname]. Otherwise it is just
    // the file basename with the .tr extension removed.
    char *font_dot = strchr(short_name, '.');
    char *exp_dot = (font_dot != NULL) ? strstr(font_dot, ".exp") : NULL;
    if (font_dot != NULL && exp_dot != NULL && font_dot != exp_dot) {
      short_name = new_dup(font_dot + 1);
      short_name[exp_dot - font_dot - 1] = '\0';
    } else {
      short_name = new_dup(short_name);
      int len = strlen(short_name);
      if (!strcmp(short_name + len - 3, ".tr"))
        short_name[len - 3] = '\0';
    }
    int fontinfo_id;
    FontInfo fontinfo;
    fontinfo.name = short_name;
    fontinfo.properties = 0;  // Not used to lookup in the table
    if (!classify.get_fontinfo_table().contains(fontinfo)) {
      fontinfo_id = classify.get_fontinfo_table().push_back(fontinfo);
      printf("%s has no defined properties.\n", short_name);
    } else {
      fontinfo_id = classify.get_fontinfo_table().get_id(fontinfo);
      // Update the properties field
      fontinfo = classify.get_fontinfo_table().get(fontinfo_id);
      delete[] short_name;
    }
    TrainingPage = Efopen (PageName, "r");
    CharList = ReadTrainingSamples (TrainingPage);
    fclose (TrainingPage);
    //WriteTrainingSamples (Directory, CharList);
    pCharList = CharList;
    iterate(pCharList) {
      //Cluster
      CharSample = (LABELEDLIST) first_node (pCharList);
//    printf ("\nClustering %s ...", CharSample->Label);
      Clusterer = SetUpForClustering(CharSample, PROGRAM_FEATURE_TYPE);
      Config.MagicSamples = CharSample->SampleCount;
      ProtoList = ClusterSamples(Clusterer, &Config);
      CleanUpUnusedData(ProtoList);

      //Merge
      MergeInsignificantProtos(ProtoList, CharSample->Label,
                               Clusterer, &Config);
      if (strcmp(test_ch, CharSample->Label) == 0)
        DisplayProtoList(test_ch, ProtoList);
      ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
                                            ShowInsignificantProtos,
                                            Clusterer->SampleSize);
      FreeClusterer(Clusterer);
      MergeClass = FindClass (ClassList, CharSample->Label);
      if (MergeClass == NULL) {
        MergeClass = NewLabeledClass (CharSample->Label);
        ClassList = push (ClassList, MergeClass);
      }
      Cid = AddConfigToClass(MergeClass->Class);
      MergeClass->Class->font_set.push_back(fontinfo_id);
      pProtoList = ProtoList;
      iterate (pProtoList) {
        Prototype = (PROTOTYPE *) first_node (pProtoList);

        // see if proto can be approximated by existing proto
        Pid = FindClosestExistingProto(MergeClass->Class,
                                       MergeClass->NumMerged, Prototype);
        if (Pid == NO_PROTO) {
          Pid = AddProtoToClass (MergeClass->Class);
          Proto = ProtoIn (MergeClass->Class, Pid);
          MakeNewFromOld (Proto, Prototype);
          MergeClass->NumMerged[Pid] = 1;
        }
        else {
          MakeNewFromOld (&DummyProto, Prototype);
          ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
              (FLOAT32) MergeClass->NumMerged[Pid], 1.0,
              ProtoIn (MergeClass->Class, Pid));
          MergeClass->NumMerged[Pid] ++;
        }
        Config2 = MergeClass->Class->Configurations[Cid];
        AddProtoToConfig (Pid, Config2);
      }
      FreeProtoList (&ProtoList);
    }
    FreeTrainingSamples (CharList);
  }
  //WriteMergedTrainingSamples(Directory,ClassList);
  WriteMicrofeat(Directory, ClassList);
  SetUpForFloat2Int(ClassList);
  IntTemplates = classify.CreateIntTemplates(TrainingData,
                                             unicharset_training);
  strcpy (Filename, "");
  if (Directory != NULL) {
    strcat (Filename, Directory);
    strcat (Filename, "/");
  }
  strcat (Filename, "inttemp");
#ifdef __UNIX__
  OutFile = Efopen (Filename, "w");
#else
  OutFile = Efopen (Filename, "wb");
#endif
  classify.WriteIntTemplates(OutFile, IntTemplates, unicharset_training);
  fclose (OutFile);
  strcpy (Filename, "");
  if (Directory != NULL) {
    strcat (Filename, Directory);
    strcat (Filename, "/");
  }
  strcat (Filename, "pffmtable");
  // Now create pffmtable.
  WritePFFMTable(IntTemplates, Filename);
  // Write updated unicharset to a file.
  if (!unicharset_training.save_to_file(OutputUnicharsetFile)) {
    fprintf(stderr, "Failed to save unicharset to file %s\n",
            OutputUnicharsetFile);
    exit(1);
  }
  printf ("Done!\n"); /**/
  FreeLabeledClassList (ClassList);
  return 0;
}	/* main */
/** SetUpForFloat2Int **************************************************/
void SetUpForFloat2Int(
    LIST LabeledClassList)
{
  MERGE_CLASS	MergeClass;
  CLASS_TYPE		Class;
  int				NumProtos;
  int				NumConfigs;
  int				NumWords;
  int				i, j;
  float			Values[3];
  PROTO			NewProto;
  PROTO			OldProto;
  BIT_VECTOR		NewConfig;
  BIT_VECTOR		OldConfig;

  // 	printf("Float2Int ...\n");

  iterate(LabeledClassList)
  {
    UnicityTableEqEq<int>   font_set;
    MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
    Class = &TrainingData[unicharset_training.unichar_to_id(
        MergeClass->Label)];
    NumProtos = MergeClass->Class->NumProtos;
    NumConfigs = MergeClass->Class->NumConfigs;
    font_set.move(&MergeClass->Class->font_set);
    Class->NumProtos = NumProtos;
    Class->MaxNumProtos = NumProtos;
    Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
    for(i=0; i < NumProtos; i++)
    {
      NewProto = ProtoIn(Class, i);
      OldProto = ProtoIn(MergeClass->Class, i);
      Values[0] = OldProto->X;
      Values[1] = OldProto->Y;
      Values[2] = OldProto->Angle;
      Normalize(Values);
      NewProto->X = OldProto->X;
      NewProto->Y = OldProto->Y;
      NewProto->Length = OldProto->Length;
      NewProto->Angle = OldProto->Angle;
      NewProto->A = Values[0];
      NewProto->B = Values[1];
      NewProto->C = Values[2];
    }

    Class->NumConfigs = NumConfigs;
    Class->MaxNumConfigs = NumConfigs;
    Class->font_set.move(&font_set);
    Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
    NumWords = WordsInVectorOfSize(NumProtos);
    for(i=0; i < NumConfigs; i++)
    {
      NewConfig = NewBitVector(NumProtos);
      OldConfig = MergeClass->Class->Configurations[i];
      for(j=0; j < NumWords; j++)
        NewConfig[j] = OldConfig[j];
      Class->Configurations[i] = NewConfig;
    }
  }
} // SetUpForFloat2Int