/* Process utterances in the control file (-ctlfn argument) */ static void process_ctlfile ( void ) { FILE *ctlfp, *sentfp, *mllrctlfp; char *ctlfile, *cepdir, *cepext, *sentfile, *outsentfile, *mllrctlfile; char line[1024], cepfile[1024], ctlspec[1024]; /* CHANGE BY BHIKSHA: ADDED veclen AS A VARIABLE, 6 JAN 98 */ int32 ctloffset, ctlcount, veclen, sf, ef, nfr; /* END OF CHANGES BY BHIKSHA */ char mllrfile[4096], prevmllr[4096], sent[16384]; char uttid[1024]; int32 i, k; float32 **mfc; ctlfile = (char *) cmd_ln_access("-ctlfn"); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); if ((mllrctlfile = (char *) cmd_ln_access("-mllrctlfn")) != NULL) { if ((mllrctlfp = fopen (mllrctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", mllrctlfile); } else mllrctlfp = NULL; prevmllr[0] = '\0'; sentfile = (char *) cmd_ln_access("-insentfn"); if ((sentfp = fopen (sentfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", sentfile); if ((outsentfile = (char *) cmd_ln_access("-outsentfn")) != NULL) { if ((outsentfp = fopen (outsentfile, "w")) == NULL) E_FATAL("fopen(%s,r) failed\n", outsentfile); } E_INFO("Processing ctl file %s\n", ctlfile); cepdir = (char *) cmd_ln_access("-cepdir"); cepext = (char *) cmd_ln_access("-cepext"); assert ((cepdir != NULL) && (cepext != NULL)); /* BHIKSHA: ADDING VECLEN TO ALLOW VECTORS OF DIFFERENT SIZES */ veclen = *((int32 *) cmd_ln_access("-ceplen")); /* END CHANGES, 6 JAN 1998, BHIKSHA */ ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); return; } /* Skipping initial offset */ if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) { if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); ctlcount = 0; break; } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); } --ctloffset; } } /* Process the specified number of utterance or until end of control file */ while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) E_FATAL("Bad ctlfile line: %s\n", line); if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); if (strcmp (prevmllr, mllrfile) != 0) { float32 ***A, **B; int32 gid, sid; uint8 *mgau_xform; gauden_mean_reload (g, (char *) cmd_ln_access("-meanfn")); if (mllr_read_regmat (mllrfile, &A, &B, featlen, n_feat) < 0) E_FATAL("mllr_read_regmat failed\n"); mgau_xform = (uint8 *) ckd_calloc (g->n_mgau, sizeof(uint8)); /* Transform each non-CI mixture Gaussian */ for (sid = 0; sid < sen->n_sen; sid++) { if (mdef->cd2cisen[sid] != sid) { /* Otherwise it's a CI senone */ gid = sen->mgau[sid]; if (! mgau_xform[gid]) { mllr_norm_mgau (g->mean[gid], g->n_density, A, B, featlen, n_feat); mgau_xform[gid] = 1; } } } ckd_free (mgau_xform); mllr_free_regmat (A, B, featlen, n_feat); strcpy (prevmllr, mllrfile); } } if (ctlspec[0] != '/') sprintf (cepfile, "%s/%s.%s", cepdir, ctlspec, cepext); else sprintf (cepfile, "%s.%s", ctlspec, cepext); /* Read utterance transcript */ if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); break; } /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp (sent+k, uttid) != 0) E_ERROR("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent+k); } } /* Read and process mfc file */ /* CHANGE BY BHIKSHA; PASSING VECLEN TO s2mfc_read(), 6 JAN 98 */ /* Read mfc file */ /* HACK HACKA HACK BHIKSHA */ { int32 asf, aef; asf = sf; sf = asf - 4; aef = ef; ef = aef + 4; if (sf < 0 ) { E_ERROR("Utterance %s begin %d < 4; ignored\n", uttid, asf); return; } if ((nfr = s2mfc_read (cepfile, sf, ef, &mfc, veclen)) <= 0) E_ERROR("Utt %s: MFC file read (%s) failed\n", uttid, cepfile); /* END CHANGES BY BHIKSHA */ else { E_INFO ("%d mfc frames\n", nfr-8); /* -8 HACK HACKA HACK */ /* Align utterance */ align_utt (sent, mfc+4, nfr-8, ctlspec, uttid); /* +4 HACKA HACK */ } } /* END HACK HACKA HACK */ --ctlcount; } printf ("\n"); while (fgets(line, sizeof(line), ctlfp) != NULL) { if (sscanf (line, "%s", ctlspec) > 0) { E_INFO("Skipping rest of control file beginning with:\n\t%s", line); break; } } fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); if (mllrctlfp) fclose (mllrctlfp); }
void adapt_set_mllr(adapt_am_t * ad, mgau_model_t * g, const char *mllrfile, const char *cb2mllrname, mdef_t * mdef, cmd_ln_t *config) { int32 *cb2mllr; float32 varfloor; varfloor = cmd_ln_float32_r(config, "-varfloor"); /* Reread the gaussian mean from the file again */ E_INFO("Reloading mean\n"); mgau_mean_reload(g, cmd_ln_str_r(config, "-mean")); /* Reread the gaussian variance from the file again */ E_INFO("Reloading variance\n"); mgau_var_reload(g, cmd_ln_str_r(config, "-var")); mgau_uninit_compact(g); /* Delete uninitialized components */ if (g->mgau[0].var && varfloor > 0.0) mgau_var_floor(g, varfloor); /* Variance floor after above compaction */ #if MLLR_DEBUG /*This generates huge amount of information */ /* mgau_dump(g,1); */ #endif /* Read in the mllr matrix */ mllr_read_regmat(mllrfile, &(ad->regA), &(ad->regB), &(ad->regH), &(ad->mllr_nclass), mgau_veclen(g)); if (cb2mllrname && strcmp(cb2mllrname, ".1cls.") != 0) { uint32 ncb, nmllr; cb2mllr_read(cb2mllrname, &cb2mllr, &ncb, &nmllr); if (nmllr != ad->mllr_nclass) E_FATAL ("Number of classes in cb2mllr does not match mllr (%d != %d)\n", ncb, ad->mllr_nclass); if (ncb != mdef->n_sen) E_FATAL ("Number of senones in cb2mllr does not match mdef (%d != %d)\n", ncb, mdef->n_sen); } else cb2mllr = NULL; /* Transform mean and variance vectors */ mllr_norm_mgau(g, ad->regA, ad->regB, ad->regH, ad->mllr_nclass, cb2mllr); ckd_free(cb2mllr); if (g->mgau[0].var && varfloor > 0.0) mgau_var_floor(g, varfloor); /* Variance floor after above transform */ /* Re-precompute variance things */ mgau_precomp(g); #if MLLR_DEBUG /*#if 1 */ mllr_dump(ad->regA, ad->regB, ad->regH, mgau_veclen(g), g->mllr_class, cb2mllr); /*This generates huge amount of information */ /*mgau_dump(kbcore_mgau(kb->kbcore),1); */ #endif }