示例#1
0
int main(int argc, char *argv[])
{
	char *s;

   Initialise(argc,argv);

	CreateHeap(&modelHeap, "Model heap",  MSTAK, 1, 0.0, 100000, 800000 );
   CreateHMMSet(&hset,&modelHeap,TRUE);

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1)
         HError(3219,"HNetTest: Bad switch %s; must be single letter",s);
      switch(s[0]){

      case 'H':
         if (NextArg() != STRINGARG)
            HError(3219,"HNetTest: MMF File name expected");
         AddMMF(&hset,GetStrArg());
         break;
		default:
         HError(3219,"HNetTest: Unknown switch %s",s);
      }
   }

	if (NextArg()!=STRINGARG)
      HError(999,"HNetTest: Dictionary file name expected");
   dictFn = GetStrArg();
	if (NextArg()!=STRINGARG)
      HError(999,"HNetTest: Word Net file name expected");
   wdNetFn = GetStrArg();
   if (NextArg()!=STRINGARG)
      HError(999,"HNetTest: HMM list  file name expected");
   hmmListFn = GetStrArg();

	if(MakeHMMSet(&hset,hmmListFn)<SUCCESS)
      HError(999,"HNetTest: MakeHMMSet failed");
   if(LoadHMMSet(&hset,NULL,NULL)<SUCCESS)
      HError(999,"HNetTest: LoadHMMSet failed");

   InitVocab(&vocab);
   if(ReadDict(dictFn,&vocab)<SUCCESS)
      HError(3213, "HNetTest: ReadDict failed");

	LoadNetwork();

}
示例#2
0
/* GenSentences: top level control of the sentence generator */
void  GenSentences(char * latfn, char * dicfn)
{
   int i,min,max,len;
   double e,p;
   MemHeap lheap;
   FILE *f;
   Boolean isPipe;

   InitVocab(&voc);
   if(ReadDict(dicfn,&voc)<SUCCESS)
      HError(3413,"GenSententces:ReadDict failed" );
   CreateHeap(&lheap,"Lattice Heap",MSTAK,1,0.4,1000,5000);
   if ((f=FOpen(latfn,NetFilter,&isPipe)) == NULL)
      HError(3410,"GenSentences: Can't open lattice file %s",latfn);
   if((lat = ReadLattice(f, &lheap, &voc, TRUE, FALSE))==NULL)
      HError(3410,"GenSentences: ReadLattice failed");
   FClose(f,isPipe);

   if (trace&T_TOP)
      printf("HSGen %d sents from lattice %s/dictionary %s\n",
             ngen,latfn,dicfn);
   psSum = 0.0; lenSum = 0; min = 100000; max = 0;
   if (trace&T_DET) quiet = TRUE;  /* kill output if detailed trace */
   for (i=1; i<=ngen; i++){
      len = GenSent(i);
      lenSum += len;
      if (len>max) max = len;
      if (len<min) min = len;
   }
   if (stats)  {
      ComputeVSize();
      e = psSum / lenSum;
      p = exp(e);
      e = e / log(2.0);
      printf("Entropy = %f,  Perplexity = %f\n",e,p);
      printf("%d Sentences: average len = %.1f, min=%d, max=%d\n",
             ngen,(float)lenSum/ngen,min,max);
   }
}
示例#3
0
/* MakeDictionary: merge one or more dictionaries into a single one */
void MakeDictionary(char *fn,dictList *dicts,Vocab *wlist)
{
   Word word,fnd,cur;
   Pron pron,chk;
   Vocab tDict,dict;
   dictList *d;
   LabId blank=GetLabId("",TRUE);
   int i,l,n,m,p;
   
   /* Read dictionary collection */
   InitVocab(&dict);   
   for (d=dicts;d!=NULL;d=d->next) {
      InitVocab(&tDict);
      if(ReadDict(d->fname,&tDict)<SUCCESS)
	 HError(16913,"Could not read dict in %s", d->fname);
      if (trace&T_TOP) {
	 printf("Loaded %d words from %s\n",tDict.nwords,d->fname);
	 fflush(stdout);
      }
#ifdef HTK_CRYPT
      if (tDict.encrypt)
	 dict.encrypt=TRUE;
#endif
      DumpPhoneTable(&tDict,&dict);
      for (i=0,n=0,m=0,p=0; i<VHASHSIZE; i++)
	 for (word=tDict.wtab[i]; word!=NULL; word=word->next) 
	    if (word!=tDict.nullWord && word!=tDict.subLatWord) {
	       if (wlist==NULL) fnd=word;
	       else fnd=GetWord(wlist,word->wordName,FALSE);
	       cur=GetWord(&dict,word->wordName,FALSE);
	       if (fnd!=NULL && !(firstOnly && cur!=NULL)) {
		  n++;
		  cur=GetWord(&dict,word->wordName,TRUE);
		  if (word->pron==NULL) m++;
		  for (pron=word->pron;pron!=NULL;pron=pron->next) {
		     if (remDup) {
			for (chk=cur->pron;chk!=NULL;chk=chk->next) {
			   if (chk->nphones!=pron->nphones ||
			       chk->prob!=pron->prob) 
			      continue;
			   for(l=0;l<chk->nphones;l++) 
			      if (chk->phones[l]!=pron->phones[l]) break;
			   if (l==chk->nphones) break;
			}
			if (chk!=NULL) continue;
		     }
		     p++;
		     NewPron(&dict,cur,pron->nphones,pron->phones,
			     pron->outSym==NULL?blank:pron->outSym,
			     pron->prob>log(MINPRONPROB)?exp(pron->prob):0.0);
		  }
	       }
	    }
      if (trace&T_TOP) {
	 printf("Copied %d words (%d null,%d prons) from %s\n",n,m,p,d->fname);
	 fflush(stdout);
      }
      ClearVocab(&tDict);
   }
   if (wlist!=NULL) { 
      /* Check dictionary covers word list */
      for (i=0,n=0; i<VHASHSIZE; i++)
	 for (word=wlist->wtab[i]; word!=NULL; word=word->next) {
	    fnd=GetWord(&dict,word->wordName,FALSE);
	    if (fnd==NULL)
	       HError((n++>10)?16930:-16930,
		      "HLMCopy: Cannot find definition for word %s",
		      word->wordName->name);
	 }
      if (n>0)
	 HError(9999,"HLMCopy: Dictionary missing required words");
   }
   /* Write dictionary */
   if(WriteDict(fn,&dict)<SUCCESS)
      HError(3214,"HLMCopy: WriteDict failed");
   if (trace&T_TOP) {
      printf("Wrote dictionary to %s\n",outDictFn); fflush(stdout);
   }
}
示例#4
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c;
   char fmt[256];
   dictList *dEntry,*d;

   InitShell(argc,argv,prog_version,prog_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitDict();
   InitWMap();
   InitLUtil();
   InitLModel();
   InitPCalc();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   SetConfParms();

   CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000);

   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0;

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16919,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
           i = GetChkedInt(2,LM_NSIZE,s); 
	   cutOff[i] = GetChkedInt(1,1000,s);
	   break;
         case 'd':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Input dictionary file name expected");
	   dEntry=New(&gcheap,sizeof(dictList));
	   dEntry->fname=GetStrArg(); dEntry->next=NULL;
	   if (dList==NULL) dList=dEntry;
	   else {
	     for (d=dList;d->next!=NULL;d=d->next);
	     d->next=dEntry;
	   }
	   break;
         case 'f':
	   strcpy(fmt,GetStrArg());
	   for (c=fmt; *c!=0; *c=toupper(*c), c++);
	    if (strcmp(fmt, LM_TXT_TEXT)==0)
               saveFmt = LMF_TEXT;
	    else if (strcmp(fmt, LM_TXT_BINARY)==0)
               saveFmt = LMF_BINARY;
	    else if (strcmp(fmt, LM_TXT_ULTRA)==0)
               saveFmt = LMF_ULTRA;
	    else
	       HError(16919,"Unrecognised LM format, should be one of [%s, %s, %s]",
		      LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	   break;
	 case 'm':
	   remDup=FALSE;
	   break;
         case 'n':
            nSize = GetChkedInt(1,LM_NSIZE,s); break;
	 case 'o':
	   firstOnly=TRUE;
	   break;
	 case 'u':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Unigram file name expected");
	   uniFn = GetStrArg();
	   break;
         case 'v':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Dictionary output file name expected");
	   outDictFn = GetStrArg();
	   break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16919,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16919,"LMPlex: Unknown switch %s",s);
      }
   }
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Input language model filename expected");
   srcFN = GetStrArg();

   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Output language model filename expected");
   tgtFN= GetStrArg();

   if (wlistFN!=NULL) {
      InitVocab(&vocab);   
      if(ReadDict(wlistFN,&vocab) < SUCCESS) 
	 HError(16913,"Could not read dict in %s", wlistFN);
      if (trace&T_TOP) {
	printf("Loaded %d words from %s\n",vocab.nwords,wlistFN); 
	fflush(stdout);
      }
      voc = &vocab;
      CreateWordList(wlistFN,&wlist,10);
      lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   } else {
      voc = NULL;
      lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   }
   if (trace&T_TOP) {
     printf("Loaded model from %s\n",srcFN); 
     fflush(stdout);
   }
   if (lm->probType==LMP_COUNT) {
      RebuildLM(lm, cutOff, NULL, LMP_FLOAT); /* GLM there was no threshold before! */
   }
   if (uniFn!=NULL)
      ReplaceUnigrams(uniFn,lm);
   if (nSize>0 && nSize<lm->nSize)
      lm->nSize = nSize;
#ifdef HTK_CRYPT
   if (lm->encrypt && saveFmt==LMF_TEXT)
     saveFmt = LMF_BINARY;
#endif
   for (i=1;i<=lm->nSize;i++)
      lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt;
   SaveLangModel(tgtFN,lm);
   if (trace&T_TOP) {
     printf("Wrote model to %s\n",tgtFN); 
     fflush(stdout);
   }
   if (outDictFn) {
      MakeDictionary(outDictFn,dList,voc);
   }

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}   
示例#5
0
#include "HLM.h"



typedef enum {unknown, wordLoop, boBiGram, matBiGram, multiLat, wordPair} BuildType;



static int trace     = 0;           /* Trace flags */



static LabId enterId;               /* id of !ENTRY label in ngram */

static LabId exitId;                /* id of !EXIT label in ngram */

static LabId bStartId=NULL;         /* id of start bracket */

static LabId bEndId=NULL;           /* id of end bracket */



static LabId unknownId;             /* id of unknown label in ngram */

static Boolean zapUnknown = FALSE;  /* zap unknown symbols from bigram */



MemHeap buildStack;



/* ---------------- Configuration Parameters --------------------- */



static ConfParam *cParm[MAXGLOBS];

static int nParm = 0;            /* total num params */



/* ---------------- Process Command Line ------------------------- */



/* SetConfParms: set conf parms relevant to this tool */

void SetConfParms(void)

{

   int i;



   nParm = GetConfig("HBUILD", TRUE, cParm, MAXGLOBS);

   if (nParm>0){

      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;

   }

}



void ReportUsage(void)

{

   printf("\nUSAGE: HBuild [options] wordList latFile\n\n");

   printf(" Option                                       Default\n\n");

   printf(" -b      binary lattice output                ASCII\n");

   printf(" -m s    load matrix bigram from s            off\n");

   printf(" -n s    load back-off bigram from s          off\n");

   printf(" -s s1 s2 s1/s2 are bigram start/end labels   !ENTER !EXIT\n");

   printf(" -t s1 s2 bracket word-loop/pair with s1 s2   off\n");

   printf(" -u s    set unknown symbol to s              !NULL\n");

   printf(" -w s    load word-pair grammar from s        off\n");

   printf(" -x s    load multi-level lattice from s      off\n");

   printf(" -z      ignore ngrams with unknown symbol    off\n");

   PrintStdOpts(""); 

   printf("\n\n");

}



int main(int argc, char *argv[])

{

   char *wordListFn,*latFn,*ipFn=NULL;

   LModel *bigramLm;

   BuildType bType = unknown;

   Boolean saveLatBin = FALSE;

   LatFormat format = HLAT_LMLIKE;

   Lattice *lat,*ipLat;

   Vocab voc;

   char  *s;



   Lattice *ProcessWordLoop(MemHeap *latHeap, Vocab *voc);

   Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM);

   void SaveLattice(Lattice *lat, char *latFn, LatFormat format);

   Lattice *LoadLattice(MemHeap *latHeap, char *latFn, Vocab *voc,

                        Boolean shortArc);

   Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *fn);



   if(InitShell(argc,argv,hbuild_version,hbuild_vc_id)<SUCCESS)

      HError(3000,"HBuild: InitShell failed");

   InitMem();   InitLabel();

   InitMath();  

   InitDict();  InitNet();  

   InitLM();



   CreateHeap(&buildStack, "HBuild Stack",  MSTAK, 1, 0.0, 100000, LONG_MAX );



   if (!InfoPrinted() && NumArgs() == 0)

      ReportUsage();

   if (NumArgs() == 0) Exit(0);

   SetConfParms();
示例#6
0
/* Initialise: set up global data structures */
void Initialise(void)
{
   Boolean eSep;
   int s;

   /* Load hmms, convert to inverse DiagC */
   if(MakeHMMSet(&hset,hmmListFn)<SUCCESS) 
      HError(3228,"Initialise: MakeHMMSet failed");
   if(LoadHMMSet(&hset,hmmDir,hmmExt)<SUCCESS) 
      HError(3228,"Initialise: LoadHMMSet failed");
   ConvDiagC(&hset,TRUE);
   
   /* Create observation and storage for input buffer */
   SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);
   obs=MakeObservation(&gstack,hset.swidth,hset.pkind,
                       hset.hsKind==DISCRETEHS,eSep);

   /* sort out masks just in case using adaptation */
   if (xfInfo.inSpkrPat == NULL) xfInfo.inSpkrPat = xfInfo.outSpkrPat; 
   if (xfInfo.paSpkrPat == NULL) xfInfo.paSpkrPat = xfInfo.outSpkrPat; 

   if (xfInfo.useOutXForm || (update>0)) {
      CreateHeap(&regHeap,   "regClassStore",  MSTAK, 1, 0.5, 1000, 8000 );
      /* This initialises things - temporary hack - THINK!! */
      CreateAdaptXForm(&hset, "tmp");
      /* initialise structures for the f-b frame-state alignment pass */
      utt = (UttInfo *) New(&regHeap, sizeof(UttInfo));
      fbInfo = (FBInfo *) New(&regHeap, sizeof(FBInfo));
      /* initialise a recogniser for frame/state alignment purposes */
      alignpsi=InitPSetInfo(&hset);
      alignvri=InitVRecInfo(alignpsi,1,TRUE,FALSE);
      SetPruningLevels(alignvri,0,genBeam,-LZERO,0.0,tmBeam);
      InitUttInfo(utt, FALSE);
      InitialiseForBack(fbInfo, &regHeap, &hset,
                        (UPDSet) (UPXFORM), genBeam*2.0, genBeam*2.0, 
                        genBeam*4.0+1.0, 10.0);
      utt->twoDataFiles = FALSE;
      utt->S = hset.swidth[0]; 
      AttachPreComps(&hset,hset.hmem);
   }
    
   CreateHeap(&bufHeap,"Input Buffer heap",MSTAK,1,0.0,50000,50000);
   CreateHeap(&repHeap,"Replay Buffer heap",MSTAK,1,0.0,50000,50000);
   
   maxM = MaxMixInSet(&hset);
   for (s=1; s<=hset.swidth[0]; s++)
      maxMixInS[s] = MaxMixInSetS(&hset, s);
   if (trace&T_TOP) {
      printf("Read %d physical / %d logical HMMs\n",
             hset.numPhyHMM,hset.numLogHMM);  fflush(stdout);
   }
   
   /* Initialise recogniser */
   if (nToks>1) nBeam=genBeam;
   psi=InitPSetInfo(&hset);
   vri=InitVRecInfo(psi,nToks,models,states);

   /* Read dictionary and create storage for lattice */
   InitVocab(&vocab);   
   if(ReadDict(dictFn,&vocab)<SUCCESS) 
      HError(3213, "Main: ReadDict failed");
   CreateHeap(&ansHeap,"Lattice heap",MSTAK,1,0.0,4000,4000);
   if (trace & T_MEM){
      printf("Memory State After Initialisation\n");
      PrintAllHeapStats();
   }
}