Exemple #1
0
int main(int argc, char *argv[])
{
   char * labFn, *listfn, *s;
   int i,fidx;
   MLFEntry *me = NULL;
   Transcription *t;
   void InitStats(char *listfn);
   void GatherStats(Transcription *t);
   void OutputStats(void);

   if(InitShell(argc,argv,hlstats_version,hlstats_vc_id)<SUCCESS)
      HError(1300,"HLStats: InitShell failed");

   InitMem();   InitMath();
   InitWave();  InitLabel();
   InitLM();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(0);
   SetConfParms();
   
   enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */
   exitId=GetLabId("!EXIT",TRUE);   /*  to start enterId and end exitId */
   nullId=GetLabId("!NULL",TRUE);  /* Name for words not in list */

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(1319,"HLStats: Bad switch %s; must be single letter",s);
      switch(s[0]){
      case 'b':
         doBigram = TRUE;
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Ngram output file name expected");
         bigFile = GetStrArg();
         break;
      case 'c':
         doLCount = TRUE;
         lCountLimit = GetChkedInt(0,100000,s);
         break;
      case 'd':
         doDurs = TRUE; break;
      case 'f':
         bigFloor = GetChkedFlt(0.0,1000.0,s);
         break;
      case 'h':
         hSize =  GetChkedInt(1,2,s);
         break;
      case 'l':
         doList = TRUE; 
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Output label list file name expected");
         listFile = GetStrArg();
         break;
      case 'o':
         doBOff = TRUE;
         break;
      case 'p':
         doPCount = TRUE;
         pCountLimit = GetChkedInt(0,100000,s);
         break;
      case 's':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: ENTER label name expected");
         enterId=GetLabId(GetStrArg(),TRUE);
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: EXIT label name expected");
         exitId=GetLabId(GetStrArg(),TRUE);
         break;
      case 't':
         bigThresh = GetChkedInt(0,100,s);
         break;
      case 'u':
         uniFloor = GetChkedFlt(0.0,1000.0,s);
         break;
      case 'G':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Input label File format expected");
         if((ff = Str2Format(GetStrArg())) == ALIEN)
            HError(-1389,"HLStats: Warning ALIEN Label file format set");
         break;
      case 'I':
         if (NextArg() != STRINGARG)
            HError(1319,"HLStats: Input MLF file name expected");
         LoadMasterFile(GetStrArg());
         break;
      case 'T':
         if (NextArg() != INTARG)
            HError(1319,"HLStats: Trace value expected");
         trace = GetChkedInt(0,017,s); break;
      default:
         HError(1319,"HLStats: Unknown switch %s",s);
      }
   }

   if (NextArg()!=STRINGARG)
      HError(1319,"HLStats: Label list file name expected");
   listfn = GetStrArg();
   if (!(doDurs || doBigram || doList || doLCount || doPCount))
      HError(1330,"HLStats: Nothing to do!");
   InitStats(listfn);

   i=0;
   while (NumArgs()>0) {
      if (NextArg()!=STRINGARG)
         HError(1319,"HLStats: Input label file name expected");
      labFn = GetStrArg();
      if (IsMLFFile(labFn)) {
         fidx = NumMLFFiles();
         if ((me=GetMLFTable()) != NULL) {
            while(me->next != NULL) me=me->next;
            LoadMasterFile(labFn);
            me=me->next;
         }
         else {
            LoadMasterFile(labFn);
            me=GetMLFTable();
         }
         while (me != NULL) {
            if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) {
               if (trace&T_FIL) {
                  printf("  Processing file %s\n",me->pattern); fflush(stdout);
               }
               t = LOpen(&tmpHeap,me->pattern,ff);
               if (t->numLists<1)
                  HError(-1330,"HLStats: Empty file %s",me->pattern);
               else
                  GatherStats(t),i++;

               Dispose(&tmpHeap,t);
            }
            me = me->next;
            if ((trace&T_BAS) && !(trace&T_FIL) &&
                NumMLFEntries()>5000 && i%1000==0) 
               printf(". "),fflush(stdout);
         }
         if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000)
            printf("\n");
      } else {
         if (trace&T_FIL) {
            printf("  Processing file %s\n",labFn); fflush(stdout);
         }
         t = LOpen(&tmpHeap,labFn,ff);
         if (t->numLists<1)
            HError(-1330,"HLStats: Empty file %s",me->pattern);
         else
            GatherStats(t),i++;
         Dispose(&tmpHeap,t);
      }
   }
   if (trace&T_MEM)
      PrintAllHeapStats();
   OutputStats();

   if (trace&T_MEM)
      PrintAllHeapStats();
   Exit(0);
   return (0);          /* never reached -- make compiler happy */
}
// Recognizes the image_data, returning the labels,
// scores, and corresponding pairs of start, end x-coords in coords.
bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
                                   bool debug, bool re_invert,
                                   float* scale_factor, NetworkIO* inputs,
                                   NetworkIO* outputs) {
  // Maximum width of image to train on.
  const int kMaxImageWidth = 2560;
  // This ensures consistent recognition results.
  SetRandomSeed();
  int min_width = network_->XScaleFactor();
  Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
                                      &randomizer_, scale_factor);
  if (pix == NULL) {
    tprintf("Line cannot be recognized!!\n");
    return false;
  }
  if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
    tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
            pixGetHeight(pix));
    pixDestroy(&pix);
    return false;
  }
  // Reduction factor from image to coords.
  *scale_factor = min_width / *scale_factor;
  inputs->set_int_mode(IsIntMode());
  SetRandomSeed();
  Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, inputs);
  network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
  // Check for auto inversion.
  float pos_min, pos_mean, pos_sd;
  OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
  if (invert && pos_min < 0.5) {
    // Run again inverted and see if it is any better.
    NetworkIO inv_inputs, inv_outputs;
    inv_inputs.set_int_mode(IsIntMode());
    SetRandomSeed();
    pixInvert(pix, pix);
    Input::PreparePixInput(network_->InputShape(), pix, &randomizer_,
                           &inv_inputs);
    network_->Forward(debug, inv_inputs, NULL, &scratch_space_, &inv_outputs);
    float inv_min, inv_mean, inv_sd;
    OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
    if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
      // Inverted did better. Use inverted data.
      if (debug) {
        tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
                pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
      }
      *outputs = inv_outputs;
      *inputs = inv_inputs;
    } else if (re_invert) {
      // Inverting was not an improvement, so undo and run again, so the
      // outputs match the best forward result.
      SetRandomSeed();
      network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
    }
  }
  pixDestroy(&pix);
  if (debug) {
    GenericVector<int> labels, coords;
    LabelsFromOutputs(*outputs, &labels, &coords);
    DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
    DebugActivationPath(*outputs, labels, coords);
  }
  return true;
}