Example #1
0


static int       nSize = 0;              /* output ngram size */

static WordMap   wList;                  /* the word list */

static int       nLModel;                /* number of loaded LMs */

static LMInfo    lmInfo[MAX_LMODEL];     /* array of loaded LMs */

static BackOffLM *tgtLM;                 /* target lm */

static char      *tgtFN;                 /* output model name */

static MemHeap   langHeap;               /* Stores global stats */

static LMFileFmt saveFmt = DEF_SAVEFMT;  /* LM file format */



/* ---------------- Configuration Parameters --------------------- */



static ConfParam *cParm[MAXGLOBS];

static int nParm = 0;                  /* total num params */





/* ---------------- Function prototypes -------------------------- */



void Initialise(void);





/* ---------------- Process Command Line ------------------------- */



/* SetConfParms: set conf parms relevant to this tool */

void SetConfParms(void)

{

   int i;



   nParm = GetConfig("LMERGE", TRUE, cParm, MAXGLOBS);

   if (nParm>0){

      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;

   }

}



char *ReturnLMName(int fmt)

{

   switch(fmt) {

      case LMF_TEXT:

	 return LM_TXT_TEXT;

      case LMF_BINARY:

	 return LM_TXT_BINARY;

      case LMF_ULTRA:

	 return LM_TXT_ULTRA;

      default:

	 return LM_TXT_OTHER;
Example #2
0
int main(int argc, char *argv[])
{
   int i;
   char *c,*s,*fn;
   char sBuf[256],fmt[256];

   void       Initialise(void);
   void       ProcessText(char *fn,bool lastFile);
   bool    Exists(char *fn);
   BackOffLM *CombineModels(MemHeap *heap,LMInfo *lmi,int nLModel,int nSize,WordMap *wl) ;

   InitShell(argc,argv,ladapt_version,ladapt_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitLUtil();
   InitWMap();
   InitGBase();
   InitLModel();
   InitPCalc();
   InitPMerge();

   SetConfParms();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   InitBuildInfo(&binfo); 
   binfo.dctype = DC_ABSOLUTE;
   nLModel = 1;
   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16419,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'a':
            newWords = GetChkedInt(10,10000000,s); break;
         case 'b':
            ngbSize = GetChkedInt(10,10000000,s); break;
         case 'c':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    binfo.cutOff[i] = GetChkedInt(0,1000,s);
	    break;
         case 'd':
            if (NextArg()!=STRINGARG)
               HError(16419,"Gram base root file name expected");
            rootFN = GetStrArg(); 
	    break;
         case 'f':
	    strcpy(fmt, GetStrArg());
	    for (c=fmt; *c; *c=toupper(*c), c++); /* To uppercase */
	    if (strcmp(fmt, LM_TXT_TEXT)==0)
	      binfo.saveFmt = LMF_TEXT;
	    else if (strcmp(fmt, LM_TXT_BINARY)==0)
	       binfo.saveFmt = LMF_BINARY;
	    else if (strcmp(fmt, LM_TXT_ULTRA)==0)
	       binfo.saveFmt = LMF_ULTRA;
	    else
	       HError(16419,"Unrecognised LM format, should be one of [%s, %s, %s]",
		      LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	    break;
         case 'g':
            processText = FALSE; break;
	 case 'i':
            if (NextArg()!=FLOATARG)
	       HError(16419,"Interpolation weight expected");
	    lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s);
            if (NextArg()!=STRINGARG)
	       HError(16419,"Interpolation LM filename expected");
	    lmInfo[nLModel].fn = GetStrArg();
	    nLModel++;
	    break;
         case 'j':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    binfo.wdThresh[i] = GetChkedFlt(0.0,1E10,s);
	    break;
         case 'n':
            nSize = GetChkedInt(1, MAXNG, s); break;
#ifdef HTK_TRANSCRIBER
         case 's':
            if (NextArg()!=STRINGARG)
               HError(16419,"Gram file text source descriptor expected");
            txtSrc = GetStrArg(); break;
         case 't':
	    binfo.dctype = DC_KATZ; break;
#endif
         case 'w':
            if (NextArg()!=STRINGARG)
               HError(16419,"Word list file name expected");
            wlistFN = GetStrArg(); break;
#ifndef HTK_TRANSCRIBER
         case 'x':
            binfo.ptype = LMP_COUNT; break;
#endif
         case 'T':
            trace = GetChkedInt(0,077,s); break;
         default:
            HError(16419,"LAdapt: Unknown switch %s",s);
      }
   }
#ifdef HTK_TRANSCRIBER
   if (nLModel==1) {  /* must interpolate with at least one model */
      HError(16419,"LAdapt: at least one model must be specified with -i option");
   }
   if (binfo.saveFmt==LMF_TEXT) { /* save fomat cannot be TEXT */ 
      binfo.saveFmt=LMF_BINARY;
   }
#endif
   if (NextArg() != STRINGARG)
      HError(16419,"LAdapt: language model file name expected");
   outFN = CopyString(&gstack,GetStrArg());

   Initialise();
   if (processText) {
      if (NextArg() != STRINGARG)
	 ProcessText(NULL,TRUE);       /* input from stdin */
      else
	 while (NextArg() == STRINGARG) {
	    /* !! copy string argument since it gets overwritten 
	       by NextArg() when reading from script file */
	    fn = CopyString(&gstack,GetStrArg());
	    ProcessText(fn,NextArg() != STRINGARG);
	 }
      if (NumArgs() != 0)
	 HError(-16419,"LAdapt: unused args left on cmd line");
      for (i=0; i<stdBuf.ngb->fndx; i++) {
	 sprintf(sBuf,"%s.%d",stdBuf.ngb->fn,i);  
	 AddInputGFile(&inSet,sBuf,1.0);
      }
      ResetHeap(&langHeap);
   } else {
      for (i=0; i<MAX_NGRAM_FILES; i++) {
	 sprintf(sBuf,"%s.%d",rootFN,i);
	 if (!Exists(sBuf))
	    break;
	 AddInputGFile(&inSet,sBuf,1.0);
      }
      if (i==MAX_NGRAM_FILES)
      {
	HError(-16419, "LAdapt: Only %d n-gram files read (recompile with different setting\nof MAX_NGRAM_FILES");
      }
   }
   if (nLModel==1) {
      adpLM = GenerateModel(&langHeap,&binfo);
   } else {
      if (binfo.ptype==LMP_COUNT) 
	 binfo.ptype = LMP_FLOAT;
      newLM = GenerateModel(&langHeap,&binfo);
      lmInfo[0].lm = newLM;
      lmInfo[0].fn = "unknown";
      /* combine all models into one */
      adpLM = CombineModels(&langHeap,lmInfo,nLModel,nSize,tgtVoc);
   }
#ifdef HTK_TRANSCRIBER
#ifdef HTK_CRYPT
   adpLM->encrypt = TRUE;     /* force to write encrypted model */
#endif
#endif
   SaveLangModel(outFN,adpLM);

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}
Example #3
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c,*e;

   InitShell(argc,argv,lplex_version,lplex_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitWMap();
   InitCMap();
   InitLUtil();
   InitLModel();
   InitPCalc();
   InitPMerge();
   SetConfParms();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   nLModel = 1;
   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0;
   CreateHeap(&permHeap, "permHeap", MSTAK, 1, 1.0, 4000, 20000);
   CreateHeap(&tempHeap, "tempHeap", MSTAK, 1, 1.0, 8000, 40000);
   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1)
         HError(16619,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
            i = GetChkedInt(2,LM_NSIZE,s);
	    cutOff[i] = GetChkedInt(1,1000,s);
	    break;
         case 'd':
            i = GetChkedInt(2,LM_NSIZE,s);
	    wdThresh[i] = GetChkedFlt(0.0,1E10,s);
	    break;
         case 'e':
	    if (NextArg() != STRINGARG)
	      HError(16619,"LPlex: Eq Class Name Expected");
	    c = GetStrArg();
	    if (NextArg() != STRINGARG)
	      HError(16619,"LPlex: Eq Label Name Expected");
	    e = GetStrArg();
	    AddEquiv(c,e);
	    break;
	 case 'i':
            if (NextArg()!=FLOATARG)
	       HError(16619,"LPlex: Interpolation weight expected");
	    lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s);
            if (NextArg()!=STRINGARG)
	       HError(16619,"LPlex: Interpolation LM filename expected");
	    lmInfo[nLModel].fn = GetStrArg();
	    nLModel++;
	    break;
	 case 'n':
	    testInfo[numTests++] = GetChkedInt(1, 10, s); break;
	 case 'o':
	    printOOV = TRUE; break;
          case 's':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: Prob Stream file name expected");
	    outStreamFN = GetStrArg();
	    break;
	 case 't':
	    streamMode = TRUE; break;
	 case 'u':
	    skipOOV = FALSE; break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
         case 'z':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: New null class name expected");
	    nulName = GetStrArg();
	    break;
	 case 'G':
	    if (NextArg() != STRINGARG)
	       HError(16619,"Label File format expected");
	    if((lff = Str2Format(GetStrArg())) == ALIEN)
	       HError(16619,"Warning ALIEN Label file format set");
	    break;
	 case 'I':
	    if (NextArg() != STRINGARG)
	       HError(16619,"MLF file name expected");
	    LoadMasterFile(GetStrArg()); break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16619,"LPlex: Unknown switch %s",s);
      }
   }
#ifdef HTK_TRANSCRIBER
   if (trace&T_PROB) trace=trace^T_PROB;
#endif
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16619, "Language model filename expected");
   lmInfo[0].fn = GetStrArg();

   Initialise();
   ProcessFiles();

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}