Exemple #1
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c;
   char fmt[256];
   dictList *dEntry,*d;

   InitShell(argc,argv,prog_version,prog_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitDict();
   InitWMap();
   InitLUtil();
   InitLModel();
   InitPCalc();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   SetConfParms();

   CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000);

   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0;

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16919,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
           i = GetChkedInt(2,LM_NSIZE,s); 
	   cutOff[i] = GetChkedInt(1,1000,s);
	   break;
         case 'd':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Input dictionary file name expected");
	   dEntry=New(&gcheap,sizeof(dictList));
	   dEntry->fname=GetStrArg(); dEntry->next=NULL;
	   if (dList==NULL) dList=dEntry;
	   else {
	     for (d=dList;d->next!=NULL;d=d->next);
	     d->next=dEntry;
	   }
	   break;
         case 'f':
	   strcpy(fmt,GetStrArg());
	   for (c=fmt; *c!=0; *c=toupper(*c), c++);
	    if (strcmp(fmt, LM_TXT_TEXT)==0)
               saveFmt = LMF_TEXT;
	    else if (strcmp(fmt, LM_TXT_BINARY)==0)
               saveFmt = LMF_BINARY;
	    else if (strcmp(fmt, LM_TXT_ULTRA)==0)
               saveFmt = LMF_ULTRA;
	    else
	       HError(16919,"Unrecognised LM format, should be one of [%s, %s, %s]",
		      LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	   break;
	 case 'm':
	   remDup=FALSE;
	   break;
         case 'n':
            nSize = GetChkedInt(1,LM_NSIZE,s); break;
	 case 'o':
	   firstOnly=TRUE;
	   break;
	 case 'u':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Unigram file name expected");
	   uniFn = GetStrArg();
	   break;
         case 'v':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Dictionary output file name expected");
	   outDictFn = GetStrArg();
	   break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16919,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16919,"LMPlex: Unknown switch %s",s);
      }
   }
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Input language model filename expected");
   srcFN = GetStrArg();

   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Output language model filename expected");
   tgtFN= GetStrArg();

   if (wlistFN!=NULL) {
      InitVocab(&vocab);   
      if(ReadDict(wlistFN,&vocab) < SUCCESS) 
	 HError(16913,"Could not read dict in %s", wlistFN);
      if (trace&T_TOP) {
	printf("Loaded %d words from %s\n",vocab.nwords,wlistFN); 
	fflush(stdout);
      }
      voc = &vocab;
      CreateWordList(wlistFN,&wlist,10);
      lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   } else {
      voc = NULL;
      lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   }
   if (trace&T_TOP) {
     printf("Loaded model from %s\n",srcFN); 
     fflush(stdout);
   }
   if (lm->probType==LMP_COUNT) {
      RebuildLM(lm, cutOff, NULL, LMP_FLOAT); /* GLM there was no threshold before! */
   }
   if (uniFn!=NULL)
      ReplaceUnigrams(uniFn,lm);
   if (nSize>0 && nSize<lm->nSize)
      lm->nSize = nSize;
#ifdef HTK_CRYPT
   if (lm->encrypt && saveFmt==LMF_TEXT)
     saveFmt = LMF_BINARY;
#endif
   for (i=1;i<=lm->nSize;i++)
      lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt;
   SaveLangModel(tgtFN,lm);
   if (trace&T_TOP) {
     printf("Wrote model to %s\n",tgtFN); 
     fflush(stdout);
   }
   if (outDictFn) {
      MakeDictionary(outDictFn,dList,voc);
   }

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}   
Exemple #2
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c,*e;

   InitShell(argc,argv,lplex_version,lplex_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitWMap();
   InitCMap();
   InitLUtil();
   InitLModel();
   InitPCalc();
   InitPMerge();
   SetConfParms();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   nLModel = 1;
   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0;
   CreateHeap(&permHeap, "permHeap", MSTAK, 1, 1.0, 4000, 20000);
   CreateHeap(&tempHeap, "tempHeap", MSTAK, 1, 1.0, 8000, 40000);
   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1)
         HError(16619,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
            i = GetChkedInt(2,LM_NSIZE,s);
	    cutOff[i] = GetChkedInt(1,1000,s);
	    break;
         case 'd':
            i = GetChkedInt(2,LM_NSIZE,s);
	    wdThresh[i] = GetChkedFlt(0.0,1E10,s);
	    break;
         case 'e':
	    if (NextArg() != STRINGARG)
	      HError(16619,"LPlex: Eq Class Name Expected");
	    c = GetStrArg();
	    if (NextArg() != STRINGARG)
	      HError(16619,"LPlex: Eq Label Name Expected");
	    e = GetStrArg();
	    AddEquiv(c,e);
	    break;
	 case 'i':
            if (NextArg()!=FLOATARG)
	       HError(16619,"LPlex: Interpolation weight expected");
	    lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s);
            if (NextArg()!=STRINGARG)
	       HError(16619,"LPlex: Interpolation LM filename expected");
	    lmInfo[nLModel].fn = GetStrArg();
	    nLModel++;
	    break;
	 case 'n':
	    testInfo[numTests++] = GetChkedInt(1, 10, s); break;
	 case 'o':
	    printOOV = TRUE; break;
          case 's':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: Prob Stream file name expected");
	    outStreamFN = GetStrArg();
	    break;
	 case 't':
	    streamMode = TRUE; break;
	 case 'u':
	    skipOOV = FALSE; break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
         case 'z':
	    if (NextArg() != STRINGARG)
	       HError(16619,"LPlex: New null class name expected");
	    nulName = GetStrArg();
	    break;
	 case 'G':
	    if (NextArg() != STRINGARG)
	       HError(16619,"Label File format expected");
	    if((lff = Str2Format(GetStrArg())) == ALIEN)
	       HError(16619,"Warning ALIEN Label file format set");
	    break;
	 case 'I':
	    if (NextArg() != STRINGARG)
	       HError(16619,"MLF file name expected");
	    LoadMasterFile(GetStrArg()); break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16619,"LPlex: Unknown switch %s",s);
      }
   }
#ifdef HTK_TRANSCRIBER
   if (trace&T_PROB) trace=trace^T_PROB;
#endif
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16619, "Language model filename expected");
   lmInfo[0].fn = GetStrArg();

   Initialise();
   ProcessFiles();

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}
Exemple #3
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c;
   char fmt[256];

   InitShell(argc,argv,lnorm_version,lnorm_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitWMap();
   InitLUtil();
   InitLModel();
   InitPCalc();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   SetConfParms();

   CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000);

   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0;
   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16519,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    cutOff[i] = GetChkedInt(1,1000,s);
	    break;
         case 'd':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    wdThresh[i] = GetChkedFlt(0.0,1E10,s);
	    break;
         case 'f':
	   strcpy(fmt,GetStrArg());
	   for (c=fmt; *c!=0; *c=toupper(*c),c++);
           if (strcmp(fmt, LM_TXT_TEXT)==0)
              saveFmt = LMF_TEXT;
           else if (strcmp(fmt, LM_TXT_BINARY)==0)
              saveFmt = LMF_BINARY;
           else if (strcmp(fmt, LM_TXT_ULTRA)==0)
              saveFmt = LMF_ULTRA;
	   else
              HError(16519,"Unrecognised LM format, should be one of [%s, %s, %s]",
                     LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	   break;
         case 'n':
            nSize = GetChkedInt(1,LM_NSIZE,s); break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16519,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16519,"LMPlex: Unknown switch %s",s);
      }
   }
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16519, "Input language model filename expected");
   srcFN = GetStrArg();

   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16519, "Output language model filename expected");
   tgtFN= GetStrArg();

   if (wlistFN!=NULL) {
      CreateWordList(wlistFN,&wlist,10);
      lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   } else {
      lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   }
   if (lm->probType==LMP_COUNT) {
      RebuildLM(lm,cutOff,wdThresh,LMP_FLOAT);
   } else {
      NormaliseLM(lm);
   }
   if (nSize>0 && nSize<lm->nSize)
      lm->nSize = nSize;
   for (i=1;i<=lm->nSize;i++)
      lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt;
   SaveLangModel(tgtFN,lm);

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}