static int nSize = 0; /* output ngram size */ static WordMap wList; /* the word list */ static int nLModel; /* number of loaded LMs */ static LMInfo lmInfo[MAX_LMODEL]; /* array of loaded LMs */ static BackOffLM *tgtLM; /* target lm */ static char *tgtFN; /* output model name */ static MemHeap langHeap; /* Stores global stats */ static LMFileFmt saveFmt = DEF_SAVEFMT; /* LM file format */ /* ---------------- Configuration Parameters --------------------- */ static ConfParam *cParm[MAXGLOBS]; static int nParm = 0; /* total num params */ /* ---------------- Function prototypes -------------------------- */ void Initialise(void); /* ---------------- Process Command Line ------------------------- */ /* SetConfParms: set conf parms relevant to this tool */ void SetConfParms(void) { int i; nParm = GetConfig("LMERGE", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; } } char *ReturnLMName(int fmt) { switch(fmt) { case LMF_TEXT: return LM_TXT_TEXT; case LMF_BINARY: return LM_TXT_BINARY; case LMF_ULTRA: return LM_TXT_ULTRA; default: return LM_TXT_OTHER;
int main(int argc, char *argv[]) { int i; char *c,*s,*fn; char sBuf[256],fmt[256]; void Initialise(void); void ProcessText(char *fn,bool lastFile); bool Exists(char *fn); BackOffLM *CombineModels(MemHeap *heap,LMInfo *lmi,int nLModel,int nSize,WordMap *wl) ; InitShell(argc,argv,ladapt_version,ladapt_vc_id); InitMem(); InitMath(); InitWave(); InitLabel(); InitLUtil(); InitWMap(); InitGBase(); InitLModel(); InitPCalc(); InitPMerge(); SetConfParms(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(EXIT_SUCCESS); InitBuildInfo(&binfo); binfo.dctype = DC_ABSOLUTE; nLModel = 1; while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(16419,"Bad switch %s; must be single letter",s); switch(s[0]){ case 'a': newWords = GetChkedInt(10,10000000,s); break; case 'b': ngbSize = GetChkedInt(10,10000000,s); break; case 'c': i = GetChkedInt(2,LM_NSIZE,s); binfo.cutOff[i] = GetChkedInt(0,1000,s); break; case 'd': if (NextArg()!=STRINGARG) HError(16419,"Gram base root file name expected"); rootFN = GetStrArg(); break; case 'f': strcpy(fmt, GetStrArg()); for (c=fmt; *c; *c=toupper(*c), c++); /* To uppercase */ if (strcmp(fmt, LM_TXT_TEXT)==0) binfo.saveFmt = LMF_TEXT; else if (strcmp(fmt, LM_TXT_BINARY)==0) binfo.saveFmt = LMF_BINARY; else if (strcmp(fmt, LM_TXT_ULTRA)==0) binfo.saveFmt = LMF_ULTRA; else HError(16419,"Unrecognised LM format, should be one of [%s, %s, %s]", LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA); break; case 'g': processText = FALSE; break; case 'i': if (NextArg()!=FLOATARG) HError(16419,"Interpolation weight expected"); lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s); if (NextArg()!=STRINGARG) HError(16419,"Interpolation LM filename expected"); lmInfo[nLModel].fn = GetStrArg(); nLModel++; break; case 'j': i = GetChkedInt(2,LM_NSIZE,s); binfo.wdThresh[i] = GetChkedFlt(0.0,1E10,s); break; case 'n': nSize = GetChkedInt(1, MAXNG, s); break; #ifdef HTK_TRANSCRIBER case 's': if (NextArg()!=STRINGARG) HError(16419,"Gram file text source descriptor expected"); txtSrc = GetStrArg(); break; case 't': binfo.dctype = DC_KATZ; break; #endif case 'w': if (NextArg()!=STRINGARG) HError(16419,"Word list file name expected"); wlistFN = GetStrArg(); break; #ifndef HTK_TRANSCRIBER case 'x': binfo.ptype = LMP_COUNT; break; #endif case 'T': trace = GetChkedInt(0,077,s); break; default: HError(16419,"LAdapt: Unknown switch %s",s); } } #ifdef HTK_TRANSCRIBER if (nLModel==1) { /* must interpolate with at least one model */ HError(16419,"LAdapt: at least one model must be specified with -i option"); } if (binfo.saveFmt==LMF_TEXT) { /* save fomat cannot be TEXT */ binfo.saveFmt=LMF_BINARY; } #endif if (NextArg() != STRINGARG) HError(16419,"LAdapt: language model file name expected"); outFN = CopyString(&gstack,GetStrArg()); Initialise(); if (processText) { if (NextArg() != STRINGARG) ProcessText(NULL,TRUE); /* input from stdin */ else while (NextArg() == STRINGARG) { /* !! copy string argument since it gets overwritten by NextArg() when reading from script file */ fn = CopyString(&gstack,GetStrArg()); ProcessText(fn,NextArg() != STRINGARG); } if (NumArgs() != 0) HError(-16419,"LAdapt: unused args left on cmd line"); for (i=0; i<stdBuf.ngb->fndx; i++) { sprintf(sBuf,"%s.%d",stdBuf.ngb->fn,i); AddInputGFile(&inSet,sBuf,1.0); } ResetHeap(&langHeap); } else { for (i=0; i<MAX_NGRAM_FILES; i++) { sprintf(sBuf,"%s.%d",rootFN,i); if (!Exists(sBuf)) break; AddInputGFile(&inSet,sBuf,1.0); } if (i==MAX_NGRAM_FILES) { HError(-16419, "LAdapt: Only %d n-gram files read (recompile with different setting\nof MAX_NGRAM_FILES"); } } if (nLModel==1) { adpLM = GenerateModel(&langHeap,&binfo); } else { if (binfo.ptype==LMP_COUNT) binfo.ptype = LMP_FLOAT; newLM = GenerateModel(&langHeap,&binfo); lmInfo[0].lm = newLM; lmInfo[0].fn = "unknown"; /* combine all models into one */ adpLM = CombineModels(&langHeap,lmInfo,nLModel,nSize,tgtVoc); } #ifdef HTK_TRANSCRIBER #ifdef HTK_CRYPT adpLM->encrypt = TRUE; /* force to write encrypted model */ #endif #endif SaveLangModel(outFN,adpLM); Exit(EXIT_SUCCESS); return EXIT_SUCCESS; /* never reached -- make compiler happy */ }
int main(int argc, char *argv[]) { int i; char *s,*c,*e; InitShell(argc,argv,lplex_version,lplex_vc_id); InitMem(); InitMath(); InitWave(); InitLabel(); InitWMap(); InitCMap(); InitLUtil(); InitLModel(); InitPCalc(); InitPMerge(); SetConfParms(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(EXIT_SUCCESS); nLModel = 1; for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0; CreateHeap(&permHeap, "permHeap", MSTAK, 1, 1.0, 4000, 20000); CreateHeap(&tempHeap, "tempHeap", MSTAK, 1, 1.0, 8000, 40000); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(16619,"Bad switch %s; must be single letter",s); switch(s[0]){ case 'c': i = GetChkedInt(2,LM_NSIZE,s); cutOff[i] = GetChkedInt(1,1000,s); break; case 'd': i = GetChkedInt(2,LM_NSIZE,s); wdThresh[i] = GetChkedFlt(0.0,1E10,s); break; case 'e': if (NextArg() != STRINGARG) HError(16619,"LPlex: Eq Class Name Expected"); c = GetStrArg(); if (NextArg() != STRINGARG) HError(16619,"LPlex: Eq Label Name Expected"); e = GetStrArg(); AddEquiv(c,e); break; case 'i': if (NextArg()!=FLOATARG) HError(16619,"LPlex: Interpolation weight expected"); lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s); if (NextArg()!=STRINGARG) HError(16619,"LPlex: Interpolation LM filename expected"); lmInfo[nLModel].fn = GetStrArg(); nLModel++; break; case 'n': testInfo[numTests++] = GetChkedInt(1, 10, s); break; case 'o': printOOV = TRUE; break; case 's': if (NextArg() != STRINGARG) HError(16619,"LPlex: Prob Stream file name expected"); outStreamFN = GetStrArg(); break; case 't': streamMode = TRUE; break; case 'u': skipOOV = FALSE; break; case 'w': if (NextArg() != STRINGARG) HError(16619,"LPlex: Word list file name expected"); wlistFN = GetStrArg(); break; case 'z': if (NextArg() != STRINGARG) HError(16619,"LPlex: New null class name expected"); nulName = GetStrArg(); break; case 'G': if (NextArg() != STRINGARG) HError(16619,"Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(16619,"Warning ALIEN Label file format set"); break; case 'I': if (NextArg() != STRINGARG) HError(16619,"MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'T': trace = GetChkedInt(0,077, s); break; default: HError(16619,"LPlex: Unknown switch %s",s); } } #ifdef HTK_TRANSCRIBER if (trace&T_PROB) trace=trace^T_PROB; #endif if (NextArg()!=STRINGARG) /* load the language model */ HError(16619, "Language model filename expected"); lmInfo[0].fn = GetStrArg(); Initialise(); ProcessFiles(); Exit(EXIT_SUCCESS); return EXIT_SUCCESS; /* never reached -- make compiler happy */ }