Exemple #1
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c;
   char fmt[256];
   dictList *dEntry,*d;

   InitShell(argc,argv,prog_version,prog_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitDict();
   InitWMap();
   InitLUtil();
   InitLModel();
   InitPCalc();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   SetConfParms();

   CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000);

   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0;

   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16919,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
           i = GetChkedInt(2,LM_NSIZE,s); 
	   cutOff[i] = GetChkedInt(1,1000,s);
	   break;
         case 'd':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Input dictionary file name expected");
	   dEntry=New(&gcheap,sizeof(dictList));
	   dEntry->fname=GetStrArg(); dEntry->next=NULL;
	   if (dList==NULL) dList=dEntry;
	   else {
	     for (d=dList;d->next!=NULL;d=d->next);
	     d->next=dEntry;
	   }
	   break;
         case 'f':
	   strcpy(fmt,GetStrArg());
	   for (c=fmt; *c!=0; *c=toupper(*c), c++);
	    if (strcmp(fmt, LM_TXT_TEXT)==0)
               saveFmt = LMF_TEXT;
	    else if (strcmp(fmt, LM_TXT_BINARY)==0)
               saveFmt = LMF_BINARY;
	    else if (strcmp(fmt, LM_TXT_ULTRA)==0)
               saveFmt = LMF_ULTRA;
	    else
	       HError(16919,"Unrecognised LM format, should be one of [%s, %s, %s]",
		      LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	   break;
	 case 'm':
	   remDup=FALSE;
	   break;
         case 'n':
            nSize = GetChkedInt(1,LM_NSIZE,s); break;
	 case 'o':
	   firstOnly=TRUE;
	   break;
	 case 'u':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Unigram file name expected");
	   uniFn = GetStrArg();
	   break;
         case 'v':
	   if (NextArg()!=STRINGARG)
	     HError(16919,"LMCopy: Dictionary output file name expected");
	   outDictFn = GetStrArg();
	   break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16919,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16919,"LMPlex: Unknown switch %s",s);
      }
   }
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Input language model filename expected");
   srcFN = GetStrArg();

   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16919, "Output language model filename expected");
   tgtFN= GetStrArg();

   if (wlistFN!=NULL) {
      InitVocab(&vocab);   
      if(ReadDict(wlistFN,&vocab) < SUCCESS) 
	 HError(16913,"Could not read dict in %s", wlistFN);
      if (trace&T_TOP) {
	printf("Loaded %d words from %s\n",vocab.nwords,wlistFN); 
	fflush(stdout);
      }
      voc = &vocab;
      CreateWordList(wlistFN,&wlist,10);
      lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   } else {
      voc = NULL;
      lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   }
   if (trace&T_TOP) {
     printf("Loaded model from %s\n",srcFN); 
     fflush(stdout);
   }
   if (lm->probType==LMP_COUNT) {
      RebuildLM(lm, cutOff, NULL, LMP_FLOAT); /* GLM there was no threshold before! */
   }
   if (uniFn!=NULL)
      ReplaceUnigrams(uniFn,lm);
   if (nSize>0 && nSize<lm->nSize)
      lm->nSize = nSize;
#ifdef HTK_CRYPT
   if (lm->encrypt && saveFmt==LMF_TEXT)
     saveFmt = LMF_BINARY;
#endif
   for (i=1;i<=lm->nSize;i++)
      lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt;
   SaveLangModel(tgtFN,lm);
   if (trace&T_TOP) {
     printf("Wrote model to %s\n",tgtFN); 
     fflush(stdout);
   }
   if (outDictFn) {
      MakeDictionary(outDictFn,dList,voc);
   }

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}   
Exemple #2
0
/* Initialise: perform global initialisations */
static void Initialise(void)
{
   int i,j,ndx;
   float x;
   LMInfo *li;
   Boolean inLM;
   LabId *wid,lab;
   NameId *na,nid;
   Boolean isPipe;

   nulClass = GetLabId(nulName,TRUE);

   /* normalise weights */
   for (x=0.0, i=1; i<nLModel; i++)
      x += lmInfo[i].weight;
   lmInfo[0].weight = 1.0-x;

   /* load all models */
   for (li=lmInfo, i=0; i<nLModel; i++, li++) {
      if (trace&T_TOP)
	 printf("Loading language model from %s\n",li->fn);
      li->lm = LoadLangModel(li->fn,NULL,1.0,LMP_LOG|LMP_COUNT,&permHeap);
      if (li->lm->probType==LMP_COUNT)
	 RebuildLM(li->lm,cutOff,wdThresh,LMP_LOG);
      AttachAccessInfo(li->lm);
   }

   if (trace&T_TOP) {
      printf("Using language model(s): \n");
      for (li=lmInfo,i=0; i<nLModel; i++,li++)
	 printf("  %d-gram %s, weight %.2f\n",li->lm->nSize,li->fn,li->weight);
   }
   if (numTests==0) {
      numTests=1; testInfo[0] = lmInfo[0].lm->nSize;
   }

   /* load or create word list */
   if (wlistFN!=NULL) {
      /* load word list from file */
      CreateWordList(wlistFN,&wList,nWords+10);
      nWords = wList.used;
      for (wid=wList.id, i=0; i<nWords; i++,wid++) /* assign lookup indices */
	 (*wid)->aux = (Ptr) (i+1);
   } else {
      /* derive word list from LMs */
      for (nWords=0,li=lmInfo, i=0; i<nLModel; i++, li++)
      {
 	 /* Obtain class-LM word list in a different way */
	 if (li->lm->classLM)
	 {
	   na = li->lm->classBM;

	   for (j=0; j<li->lm->classW; j++)
	   {
	     lab = GetLabId(na[j+1]->name, TRUE);
	     if (lab->aux==NULL)
	       lab->aux = (Ptr) (++nWords);
	   }
	 }
	 else
	 {
	   na = li->lm->binMap;

	   for (j=0; j<li->lm->vocSize; j++)
	   {
	     lab = GetLabId(na[j+1]->name,TRUE);
	     if (lab->aux==NULL)
	       lab->aux = (Ptr) (++nWords);
	   }
	 }
      }
      CreateWordList(NULL,&wList,nWords+10);
      for (li=lmInfo, i=0; i<nLModel; i++, li++) {
	/* Obtain class-LM word list in a different way */
	if (li->lm->classLM)
	{
	  na = li->lm->classBM;

	  for (j=0; j<li->lm->classW; j++)
	  {
	    lab = GetLabId(na[j+1]->name,TRUE);
	    ndx = ((int) lab->aux) - 1;
	    wList.id[ndx] = lab;
	  }
	}
	else
	{
	  na = li->lm->binMap;

	  for (j=0; j<li->lm->vocSize; j++)
	  {
	    lab = GetLabId(na[j+1]->name,TRUE);
	    ndx = ((int) lab->aux) - 1;
	    wList.id[ndx] = lab;
	  }
	}

      }
      wList.used = nWords;
   }
   if (trace&T_TOP) {
      printf("Found %d unique words in %d model(s)\n",nWords,nLModel);
      fflush(stdout);
   }
   if (unkId->aux==NULL && !skipOOV) {
      HError(16620,"LPlex: OOV class symbol %s not in word list",unkId->name);
   }
   if (sstId->aux==NULL) {
      HError(16620,"LPlex: sentence start symbol %s not in word list",sstId->name);
   }
   if (senId->aux==NULL) {
      HError(16620,"LPlex: sentence end symbol %s not in word list",senId->name);
   }

   /* create lookup table */
   l2nId = (NameId **) New(&permHeap,nLModel*sizeof(NameId *));
   /* create LabId -> NameId lookup arrays (one per LM) */
   for (li=lmInfo, i=0; i<nLModel; i++, li++, na++) {
      na = (NameId *) New(&permHeap,(nWords+2)*sizeof(NameId));
      for (wid = wList.id, j=0; j<nWords; j++, wid++) {
	if (li->lm->classLM)
	{
	  nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->classH, (*wid)->name, FALSE);
	}
	else
	{
	  nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->htab, (*wid)->name, FALSE);
	}
#ifdef SANITY
	 if (nid==NULL)
	    HError(-16625,"Unable to find word %s in model %s\n",(*wid)->name,li->fn);
#endif
      }
      l2nId[i] = na;
   }

   /* ensure words present at least in one model */
   for (wid = wList.id, j=0; j<nWords; j++, wid++) {
      for (inLM=FALSE,i=0; i<nLModel; i++, li++)
	 if (l2nId[i][(int) ((*wid)->aux)]!=NULL)
	    inLM = TRUE;
      if (!inLM)
	 HError(16625,"Unable to find word %s in any model\n",(*wid)->name);
   }

   /* create equivalence class lookup array */
   eqId = (LabId *) New(&permHeap,(nWords+NumEquiv()+2)*sizeof(NameId));
   for (wid = wList.id, i=0; i<nWords; i++, wid++) {
      eqId[(int) ((*wid)->aux)] = NULL;
   }

   /* link equivalence classes */
   LinkEquiv();

   /* open output stream */
   if (outStreamFN != NULL)
     if ((outStream = FOpen(outStreamFN,NoOFilter,&isPipe)) == NULL)
        HError(16610,"Initialise: unable to open output file %s",outStreamFN);

}
Exemple #3
0
int main(int argc, char *argv[])
{
   int i;
   char *s,*c;
   char fmt[256];

   InitShell(argc,argv,lnorm_version,lnorm_vc_id);
   InitMem();
   InitMath();
   InitWave();
   InitLabel();
   InitWMap();
   InitLUtil();
   InitLModel();
   InitPCalc();

   if (!InfoPrinted() && NumArgs() == 0)
      ReportUsage();
   if (NumArgs() == 0) Exit(EXIT_SUCCESS);

   SetConfParms();

   CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000);

   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0;
   while (NextArg() == SWITCHARG) {
      s = GetSwtArg();
      if (strlen(s)!=1) 
         HError(16519,"Bad switch %s; must be single letter",s);
      switch(s[0]){
         case 'c':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    cutOff[i] = GetChkedInt(1,1000,s);
	    break;
         case 'd':
            i = GetChkedInt(2,LM_NSIZE,s); 
	    wdThresh[i] = GetChkedFlt(0.0,1E10,s);
	    break;
         case 'f':
	   strcpy(fmt,GetStrArg());
	   for (c=fmt; *c!=0; *c=toupper(*c),c++);
           if (strcmp(fmt, LM_TXT_TEXT)==0)
              saveFmt = LMF_TEXT;
           else if (strcmp(fmt, LM_TXT_BINARY)==0)
              saveFmt = LMF_BINARY;
           else if (strcmp(fmt, LM_TXT_ULTRA)==0)
              saveFmt = LMF_ULTRA;
	   else
              HError(16519,"Unrecognised LM format, should be one of [%s, %s, %s]",
                     LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);
	   break;
         case 'n':
            nSize = GetChkedInt(1,LM_NSIZE,s); break;
         case 'w':
	    if (NextArg() != STRINGARG)
	       HError(16519,"LPlex: Word list file name expected");
	    wlistFN = GetStrArg();
	    break;
	 case 'T':
	    trace = GetChkedInt(0,077, s); break;
         default:
            HError(16519,"LMPlex: Unknown switch %s",s);
      }
   }
   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16519, "Input language model filename expected");
   srcFN = GetStrArg();

   if (NextArg()!=STRINGARG)  /* load the language model */
      HError(16519, "Output language model filename expected");
   tgtFN= GetStrArg();

   if (wlistFN!=NULL) {
      CreateWordList(wlistFN,&wlist,10);
      lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   } else {
      lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap);
   }
   if (lm->probType==LMP_COUNT) {
      RebuildLM(lm,cutOff,wdThresh,LMP_FLOAT);
   } else {
      NormaliseLM(lm);
   }
   if (nSize>0 && nSize<lm->nSize)
      lm->nSize = nSize;
   for (i=1;i<=lm->nSize;i++)
      lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt;
   SaveLangModel(tgtFN,lm);

   Exit(EXIT_SUCCESS);
   return EXIT_SUCCESS; /* never reached -- make compiler happy */
}