int main(int argc, char *argv[]) { char * labFn, *listfn, *s; int i,fidx; MLFEntry *me = NULL; Transcription *t; void InitStats(char *listfn); void GatherStats(Transcription *t); void OutputStats(void); if(InitShell(argc,argv,hlstats_version,hlstats_vc_id)<SUCCESS) HError(1300,"HLStats: InitShell failed"); InitMem(); InitMath(); InitWave(); InitLabel(); InitLM(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */ exitId=GetLabId("!EXIT",TRUE); /* to start enterId and end exitId */ nullId=GetLabId("!NULL",TRUE); /* Name for words not in list */ while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(1319,"HLStats: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': doBigram = TRUE; if (NextArg() != STRINGARG) HError(1319,"HLStats: Ngram output file name expected"); bigFile = GetStrArg(); break; case 'c': doLCount = TRUE; lCountLimit = GetChkedInt(0,100000,s); break; case 'd': doDurs = TRUE; break; case 'f': bigFloor = GetChkedFlt(0.0,1000.0,s); break; case 'h': hSize = GetChkedInt(1,2,s); break; case 'l': doList = TRUE; if (NextArg() != STRINGARG) HError(1319,"HLStats: Output label list file name expected"); listFile = GetStrArg(); break; case 'o': doBOff = TRUE; break; case 'p': doPCount = TRUE; pCountLimit = GetChkedInt(0,100000,s); break; case 's': if (NextArg() != STRINGARG) HError(1319,"HLStats: ENTER label name expected"); enterId=GetLabId(GetStrArg(),TRUE); if (NextArg() != STRINGARG) HError(1319,"HLStats: EXIT label name expected"); exitId=GetLabId(GetStrArg(),TRUE); break; case 't': bigThresh = GetChkedInt(0,100,s); break; case 'u': uniFloor = GetChkedFlt(0.0,1000.0,s); break; case 'G': if (NextArg() != STRINGARG) HError(1319,"HLStats: Input label File format expected"); if((ff = Str2Format(GetStrArg())) == ALIEN) HError(-1389,"HLStats: Warning ALIEN Label file format set"); break; case 'I': if (NextArg() != STRINGARG) HError(1319,"HLStats: Input MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'T': if (NextArg() != INTARG) HError(1319,"HLStats: Trace value expected"); trace = GetChkedInt(0,017,s); break; default: HError(1319,"HLStats: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(1319,"HLStats: Label list file name expected"); listfn = GetStrArg(); if (!(doDurs || doBigram || doList || doLCount || doPCount)) HError(1330,"HLStats: Nothing to do!"); InitStats(listfn); i=0; while (NumArgs()>0) { if (NextArg()!=STRINGARG) HError(1319,"HLStats: Input label file name expected"); labFn = GetStrArg(); if (IsMLFFile(labFn)) { fidx = NumMLFFiles(); if ((me=GetMLFTable()) != NULL) { while(me->next != NULL) me=me->next; LoadMasterFile(labFn); me=me->next; } else { LoadMasterFile(labFn); me=GetMLFTable(); } while (me != NULL) { if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) { if (trace&T_FIL) { printf(" Processing file %s\n",me->pattern); fflush(stdout); } t = LOpen(&tmpHeap,me->pattern,ff); if (t->numLists<1) HError(-1330,"HLStats: Empty file %s",me->pattern); else GatherStats(t),i++; Dispose(&tmpHeap,t); } me = me->next; if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000 && i%1000==0) printf(". "),fflush(stdout); } if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000) printf("\n"); } else { if (trace&T_FIL) { printf(" Processing file %s\n",labFn); fflush(stdout); } t = LOpen(&tmpHeap,labFn,ff); if (t->numLists<1) HError(-1330,"HLStats: Empty file %s",me->pattern); else GatherStats(t),i++; Dispose(&tmpHeap,t); } } if (trace&T_MEM) PrintAllHeapStats(); OutputStats(); if (trace&T_MEM) PrintAllHeapStats(); Exit(0); return (0); /* never reached -- make compiler happy */ }
// Recognizes the image_data, returning the labels, // scores, and corresponding pairs of start, end x-coords in coords. bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, bool debug, bool re_invert, float* scale_factor, NetworkIO* inputs, NetworkIO* outputs) { // Maximum width of image to train on. const int kMaxImageWidth = 2560; // This ensures consistent recognition results. SetRandomSeed(); int min_width = network_->XScaleFactor(); Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor); if (pix == NULL) { tprintf("Line cannot be recognized!!\n"); return false; } if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) { tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), pixGetHeight(pix)); pixDestroy(&pix); return false; } // Reduction factor from image to coords. *scale_factor = min_width / *scale_factor; inputs->set_int_mode(IsIntMode()); SetRandomSeed(); Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, inputs); network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs); // Check for auto inversion. float pos_min, pos_mean, pos_sd; OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd); if (invert && pos_min < 0.5) { // Run again inverted and see if it is any better. NetworkIO inv_inputs, inv_outputs; inv_inputs.set_int_mode(IsIntMode()); SetRandomSeed(); pixInvert(pix, pix); Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, &inv_inputs); network_->Forward(debug, inv_inputs, NULL, &scratch_space_, &inv_outputs); float inv_min, inv_mean, inv_sd; OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd); if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) { // Inverted did better. Use inverted data. if (debug) { tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n", pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd); } *outputs = inv_outputs; *inputs = inv_inputs; } else if (re_invert) { // Inverting was not an improvement, so undo and run again, so the // outputs match the best forward result. SetRandomSeed(); network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs); } } pixDestroy(&pix); if (debug) { GenericVector<int> labels, coords; LabelsFromOutputs(*outputs, &labels, &coords); DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_); DebugActivationPath(*outputs, labels, coords); } return true; }