コード例 #1
0
int main(int argc, char** argv) {
  int option;
  const char* output_directory = ".";
  STRING unicharset_file_name;
  // Special characters are now included by default.
  UNICHARSET unicharset;

  setlocale(LC_ALL, "");

  // Print usage
  if (argc <= 1) {
    printf("Usage: %s [-D DIRECTORY] FILE...\n", argv[0]);
    exit(1);

  }

  // Parse arguments
  while ((option = tessopt(argc, argv, "D" )) != EOF) {
    switch (option) {
      case 'D':
        output_directory = tessoptarg;
        ++tessoptind;
        break;
    }
  }

  // Save file name
  unicharset_file_name = output_directory;
  unicharset_file_name += "/";
  unicharset_file_name += kUnicharsetFileName;

  // Load box files
  for (; tessoptind < argc; ++tessoptind) {
    printf("Extracting unicharset from %s\n", argv[tessoptind]);

    FILE* box_file = fopen(argv[tessoptind], "rb");
    if (box_file == NULL) {
      printf("Cannot open box file %s\n", argv[tessoptind]);
      return -1;
    }

    TBOX box;
    STRING unichar_string;
    int line_number = 0;
    while (ReadNextBox(&line_number, box_file, &unichar_string, &box)) {
      unicharset.unichar_insert(unichar_string.string());
      set_properties(&unicharset, unichar_string.string());
    }
  }

  // Write unicharset file
  if (unicharset.save_to_file(unicharset_file_name.string())) {
    printf("Wrote unicharset file %s.\n", unicharset_file_name.string());
  }
  else {
    printf("Cannot save unicharset file %s.\n", unicharset_file_name.string());
    return -1;
  }
  return 0;
}
コード例 #2
0
/*---------------------------------------------------------------------------*/
void ParseArguments(int argc, char **argv)
/*
 **	Parameters:
 **		argc	number of command line arguments to parse
 **		argv	command line arguments
 **	Globals:
 **		ShowSignificantProtos	flag controlling proto display
 **		ShowInsignificantProtos	flag controlling proto display
 **		Config			current clustering parameters
 **		tessoptarg, tessoptind		defined by tessopt sys call
 **		Argc, Argv		global copies of argc and argv
 **	Operation:
 **		This routine parses the command line arguments that were
 **		passed to the program.  The legal arguments are:
 **			-d		"turn off display of samples"
 **			-p		"turn off significant protos"
 **			-n		"turn off insignificant proto"
 **			-S [ spherical | elliptical | mixed | automatic ]
 **			-M MinSamples	"min samples per prototype (%)"
 **			-B MaxIllegal	"max illegal chars per cluster (%)"
 **			-I Independence	"0 to 1"
 **			-C Confidence	"1e-200 to 1.0"
 **			-D Directory
 **			-R RoundingAccuracy
 **			-U InputUnicharsetFile
 **			-O OutputUnicharsetFile

 **	Return: none
 **	Exceptions: Illegal options terminate the program.
 **	History: 7/24/89, DSJ, Created.
 */

{
  int		Option;
  int		ParametersRead;
  BOOL8		Error;

  Error = FALSE;
  while (( Option = tessopt( argc, argv, "F:O:U:R:D:C:I:M:B:S:n:p" )) != EOF )
  {
    switch ( Option )
    {
      case 'n':
        sscanf(tessoptarg,"%d", &ParametersRead);
        ShowInsignificantProtos = ParametersRead;
        break;
      case 'p':
        sscanf(tessoptarg,"%d", &ParametersRead);
        ShowSignificantProtos = ParametersRead;
        break;
      case 'C':
        ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.Confidence > 1 ) Config.Confidence = 1;
        else if ( Config.Confidence < 0 ) Config.Confidence = 0;
        break;
      case 'I':
        ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.Independence > 1 ) Config.Independence = 1;
        else if ( Config.Independence < 0 ) Config.Independence = 0;
        break;
      case 'M':
        ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
        else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
        break;
      case 'B':
        ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
        else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
        break;
      case 'R':
        ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
        if ( ParametersRead != 1 ) Error = TRUE;
        else if ( RoundingAccuracy > 0.01f ) RoundingAccuracy = 0.01f;
        else if ( RoundingAccuracy < 0.0f ) RoundingAccuracy = 0.0f;
        break;
      case 'S':
        switch ( tessoptarg[0] )
        {
          case 's': Config.ProtoStyle = spherical; break;
          case 'e': Config.ProtoStyle = elliptical; break;
          case 'm': Config.ProtoStyle = mixed; break;
          case 'a': Config.ProtoStyle = automatic; break;
          default: Error = TRUE;
        }
        break;
      case 'D':
        Directory = tessoptarg;
        break;
      case 'U':
        InputUnicharsetFile = tessoptarg;
        break;
      case 'O':
        OutputUnicharsetFile = tessoptarg;
        break;
      case 'F':
        InputFontInfoFile = tessoptarg;
        break;
      case '?':
        Error = TRUE;
        break;
    }
    if ( Error )
    {
      fprintf (stderr, "usage: %s [-d] [-p] [-n]\n", argv[0] );
      fprintf (stderr, "\t[-S ProtoStyle]\n");
      fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence]\n");
      fprintf (stderr, "\t[-C Confidence] [-D Directory]\n");
      fprintf (stderr, "\t[-U InputUnicharsetFile] [-O OutputUnicharsetFile]\n");
      fprintf (stderr, "\t[-F FontInfoFile]\n");
      fprintf (stderr, "\t[ TrainingPage ... ]\n");
      exit (2);
    }
  }
}	// ParseArguments