int main_HInit(int argc, char *argv[]) { char *datafn, *s; int nSeg; void Initialise(void); void LoadFile(char *fn); void EstimateModel(void); void SaveModel(char *outfn); if(InitShell(argc,argv,hinit_version,hinit_vc_id)<SUCCESS) HError(2100,"HInit: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2100,"HInit: InitParm failed"); InitTrain(); InitUtil(); if (!InfoPrinted() && NumArgs() == 0) ReportUsageHInit(); if (NumArgs() == 0) return(0); SetConfParmsHInit(); CreateHMMSet(&hset,&gstack,FALSE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2119,"HInit: Bad switch %s; must be single letter",s); switch(s[0]){ case 'e': epsilon = GetChkedFlt(0.0,1.0,s); break; case 'i': maxIter = GetChkedInt(0,100,s); break; case 'l': if (NextArg() != STRINGARG) HError(2119,"HInit: Segment label expected"); segLab = GetStrArg(); break; case 'm': minSeg = GetChkedInt(1,1000,s); break; case 'n': newModel = FALSE; break; case 'o': outfn = GetStrArg(); break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,10.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'B': saveBinary = TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2119,"HInit: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Data file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(2119,"HInit: Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2189,"HInit: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(2119,"HInit: HMM macro file name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(2119,"HInit: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg()!=STRINGARG) HError(2119,"HInit: Output macro file directory expected"); outDir = GetStrArg(); break; case 'T': if (NextArg() != INTARG) HError(2119,"HInit: Trace value expected"); trace = GetChkedInt(0,01777,s); break; case 'X': if (NextArg()!=STRINGARG) HError(2119,"HInit: Label file extension expected"); labExt = GetStrArg(); break; default: HError(2119,"HInit: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(2119,"HInit: source HMM file name expected"); hmmfn = GetStrArg(); Initialise(); do { if (NextArg()!=STRINGARG) HError(2119,"HInit: training data file name expected"); datafn = GetStrArg(); LoadFile(datafn); } while (NumArgs()>0); nSeg = NumSegs(segStore); if (nSeg < minSeg) HError(2121,"HInit: Too Few Observation Sequences [%d]",nSeg); if (trace&T_TOP) { printf("%d Observation Sequences Loaded\n",nSeg); fflush(stdout); } EstimateModel(); SaveModel(outfn); if (trace&T_TOP) printf("Output written to directory %s\n", outDir==NULL?"current":outDir); return (0); /* never reached -- make compiler happy */ }
static int nWords; /* number of words in list */ static int nLModel; /* number of loaded LMs */ static LMInfo lmInfo[MAX_LMODEL]; /* array of loaded LMs */ static int numTests; /* number of tests to perform */ static int testInfo[MAX_TEST]; /* the array of test records */ static PStats sent; /* per utterance accumulators */ static PStats totl; /* global accumulator */ static LabId sstId = NULL; /* sentence start marker */ static LabId senId = NULL; /* sentence end marker */ static LabId unkId = NULL; /* sentence end marker */ static LabId pLab[LBUF_SIZE]; /* label array */ static Boolean skipOOV = TRUE; /* discard OOV in computation */ static Boolean printOOV = FALSE; /* print uniqe OOV's and their frequencies */ static Boolean streamMode = FALSE; /* stream mode */ static FileFormat lff = UNDEFF; /* label file format */ static char *nulName = "???"; /* name of null class */ static LabId nulClass; /* Id of NULCLASS phone label */ static int unkEquiv = 0; /* number of equivalent words outside the word list */ static NameId **l2nId; /* array of LabId -> NameId lookup tables */ static LabId *eqId; /* label equivalence lookup table */ static int cutOff[LM_NSIZE+1]; /* new cutoffs for COUNT-models */ static float wdThresh[LM_NSIZE+1]; /* new wdThresh for COUNT-models */ static char *outStreamFN = NULL; FILE *outStream; MemHeap tempHeap; /* Stores data valid only for file */ MemHeap permHeap; /* Stores global stats */ /* ---------------- Configuration Parameters --------------------- */ static ConfParam *cParm[MAXGLOBS]; static int nParm = 0; /* total num params */ /* ---------------- Static function prototypes required ---------- */ static void Initialise(void); static void ProcessFiles(void); static void AddEquiv(char * cl, char * eq); /* ---------------- Process Command Line ------------------------- */ /* SetConfParms: set conf parms relevant to this tool */ void SetConfParms(void) { int i;
void main(int argc, char *argv[]) { FILE *fwav, *ppd, *ptm; int byte_swap = 0; int headersize, sampling_rate, min_pitch, max_pitch; int filesize, total = 0, min_amp = 0, max_amp = 0, ten_ms; int min_range, max_range, verbose = 0, low_range, high_range; short *speech; int i, j; float mean, global_rms = 0.0, sample_interval; float *rho, eng; int x_start, num_peaks; short *peak_set; int pitch[NUM_FRAMES][MAX_CANDS], frame_ct = 0, peak_ct[NUM_FRAMES]; float corr[NUM_FRAMES][MAX_CANDS]; int start_frame, end_frame, done, counter, at, at_bad; short possible[GROUP_SIZE][MAX_CANDS][MAX_CANDS]; int best_path[MAX_CANDS][GROUP_SIZE+1], cand_ct; float best_cost[MAX_CANDS], best_no_bad[MAX_CANDS], min, best, min_bad; int cur_end, at2; int target, index, keep_going, end_pt; int male; int num_bad, ct1, ct2, num_shared; float ave1, ave2; int targ_pitch[NUM_FRAMES], choice1[NUM_FRAMES], choice2[NUM_FRAMES]; int ave_pit_len, ave_ct, need_fix; char message[100]; float time,start_time,out_pitch; int mstime; int length,start,last_length,last_start; int len_stem; char *stem; int end_of_file; char *ptmfn,*ppdfn; #ifndef HTKCOMPILE char *stem_ptmfn,*stem_ppdfn; #endif #ifdef HTKCOMPILE char *s; DataFile src; /* src is then a DataFile type structure */ FileFormat ff; /* ff is then a FileFormat type */ char *wavfn; /* Waveform input file name */ char *tempptmfn=NULL; /* temporary storage for command line specified ptmfn */ char *tempppdfn=NULL; /* temporary storage for command line specified ppdfn */ min_pitch = DEFAULT_MIN_PITCH; max_pitch = DEFAULT_MAX_PITCH; #endif #ifndef HTKCOMPILE if (argc < 2) { ReportUsage(); exit(1); } if (Scan_flag(argc, argv, "-o") == -1) { fprintf(stderr,"\n\nsrcfn.ptm - contains time (in seconds) against pitch (in Hz)\nsrcfn.ppd - contains length of voiced pitch period (in samples) against\n start position of pitch period (in samples)\n\n"); exit(1); } if ((fwav = Std_fopen(argv[argc - 1], "r")) == NULL) { fprintf(stderr,"Cannot open input file %s\n",argv[argc - 1]); exit(1); } /* gets the stem from the input filename and creates the output filenames from this, if not otherwise specified */ len_stem = strcspn(argv[argc - 1],"."); if ((stem = (char *)calloc((len_stem+1),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create stem character array"); exit(1); } *strncpy(stem,argv[argc - 1],len_stem); if ((stem_ptmfn = (char *)calloc((len_stem+5),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create stem_ptmfn character array"); exit(1); } if ((stem_ppdfn = (char *)calloc((len_stem+5),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create stem_ppdfn character array"); exit(1); } sprintf(stem_ptmfn,"%s.ptm",stem); sprintf(stem_ppdfn,"%s.ppd",stem); ptmfn = Scan_string(argc, argv, "-t", stem_ptmfn); ppdfn = Scan_string(argc, argv, "-p", stem_ppdfn); if ((ptm = Std_fopen(ptmfn, "w")) == NULL) { fprintf(stderr,"Cannot open output file %s\n",ptmfn); exit(1); } if ((ppd = Std_fopen(ppdfn, "w")) == NULL) { fprintf(stderr,"Cannot open output file %s\n",ppdfn); exit(1); } headersize = Scan_int(argc, argv, "-h", DEFAULT_HEADER_SIZE); sampling_rate= Scan_int(argc, argv, "-s", DEFAULT_SAMPLING_RATE); min_pitch= Scan_int(argc, argv, "-n", DEFAULT_MIN_PITCH); max_pitch= Scan_int(argc, argv, "-x", DEFAULT_MAX_PITCH); byte_swap= Scan_flag(argc, argv, "-b"); /* print out the options being used */ fprintf(stderr,"Waveform input filename : %s\n",argv[argc - 1]); fprintf(stderr,"Pitch against time output file : %s\n",ptmfn); fprintf(stderr,"Pitch period output file : %s\n",ppdfn); fprintf(stderr,"Headersize : %d\n",headersize); fprintf(stderr,"Sampling_rate : %d\n",sampling_rate); fprintf(stderr,"Min_pitch : %d\n",min_pitch); fprintf(stderr,"Max_pitch : %d\n",max_pitch); if (byte_swap == -1) fprintf(stderr,"Byte swapping data\n"); for (i = 0; i < NUM_FRAMES; i++) targ_pitch[i] = choice1[i] = choice2[i] = 0; /* load in the speech file */ Panic_fseek(fwav, 0, 2); filesize = (ftell(fwav) - headersize) / sizeof(short); Panic_fseek(fwav, headersize, 0); speech = Panic_short_array(filesize); Panic_fread(speech, sizeof(*speech), filesize, fwav); /* byte swap if nesscessary */ if (byte_swap == -1) { for (i = 0; i < filesize; i++) Swap(&speech[i]); } #endif #ifdef HTKCOMPILE InitShell(argc,argv); InitMath(FALSE); if (NumArgs()==0) ReportUsage(); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(1,"Bad switch %s; must be single letter",s); switch(s[0]){ case 'o': fprintf(stderr,"\n\nsrcfn.ptm - contains time (in seconds) against pitch (in Hz)\nsrcfn.ppd - contains length of voiced pitch period (in samples) against\n start position of pitch period (in samples)\n\n"); exit(1); break; case 'n': min_pitch = GetIntArg(); break; case 'x': max_pitch = GetIntArg(); break; case 't': tempptmfn = GetStrArg(); break; case 'p': tempppdfn = GetStrArg(); break; case 'F': if (NextArg() != STRINGARG) HError(1,"Data File format expected"); if((ff = Str2Format(GetStrArg())) == ALIEN) HError(0,"Warning ALIEN Data file format set"); SetFormat(ff); break; default: HError(1,"Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(1,"Waveform file name expected"); wavfn = GetStrArg(); /* gets the stem from the input filename and creates the output filenames from this, if not otherwise specified */ len_stem = strcspn(wavfn,"."); if ((stem = (char *)calloc((len_stem+1),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create stem character array"); exit(1); } *strncpy(stem,wavfn,len_stem); if (tempptmfn == NULL) { if ((ptmfn = (char *)calloc((len_stem+5),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create ptmfn character array"); exit(1); } sprintf(ptmfn,"%s.ptm",stem); } else ptmfn = tempptmfn; if (tempppdfn == NULL) { if ((ppdfn = (char *)calloc((len_stem+5),sizeof(char))) == NULL) { fprintf(stderr,"Cannot create ppdfn character array"); exit(1); } sprintf(ppdfn,"%s.ppd",stem); } else ppdfn = tempppdfn; if ((ptm = fopen(ptmfn, "w")) == NULL) { fprintf(stderr,"Cannot open output file %s\n",ptmfn); exit(1); } if ((ppd = fopen(ppdfn, "w")) == NULL) { fprintf(stderr,"Cannot open output file %s\n",ppdfn); exit(1); } /* print out the options being used */ fprintf(stderr,"Waveform input filename : %s\n",wavfn); fprintf(stderr,"Pitch against time output file : %s\n",ptmfn); fprintf(stderr,"Pitch period output file : %s\n",ppdfn); fprintf(stderr,"Min_pitch : %d\n",min_pitch); fprintf(stderr,"Max_pitch : %d\n",max_pitch); SpOpen(wavfn,&src); if (src.sampKind != WAVEFORM) HError(99,"Waveform file expected"); sampling_rate = 10000000/src.sampPeriod; filesize = src.nSamples; for (i = 0; i < NUM_FRAMES; i++) targ_pitch[i] = choice1[i] = choice2[i] = 0; /* load in the speech file */ if ((speech = (short *)calloc(filesize,sizeof(short))) == NULL) { fprintf(stderr,"Cannot create speech array"); exit(1); } for (i = 0; i < filesize; i++) GetSample(&src,i,&speech[i]); #endif /* The rest is independent of the HTKCOMPILE flag */ for (i = 0; i < filesize; i++) total += speech[i]; mean = (total * 1.0) / filesize; for (i = 0; i < filesize; i++) { speech[i] -= mean; if (speech[i] > max_amp) max_amp = speech[i]; if (speech[i] < min_amp) min_amp = speech[i]; } for (i = 0; i < filesize; i++) global_rms += (speech[i] * speech[i]); global_rms = sqrt(global_rms/(filesize * 1.0)); sample_interval = 1.0 / (float) sampling_rate; ten_ms = (int) (1.0 * sampling_rate) / 100; min_range = (int) ((1.0/max_pitch)/sample_interval); max_range = (int) ((1.0/min_pitch)/sample_interval); if ((rho = (float *)calloc(max_range + 1,sizeof(float))) == NULL) { fprintf(stderr,"Cannot create rho array"); exit(1); } if ((peak_set = (short *)calloc(MAX_CANDS,sizeof(short))) == NULL) { fprintf(stderr,"Cannot create peak_set array"); exit(1); } x_start = 0; while (x_start + 2 * max_range < filesize) { for (i = 0; i <= max_range; i++) rho[i] = 0.0; eng = find_sq_eng(speech, x_start, max_range - min_range + 1); if (eng >= 0.05 * global_rms) { for (i = min_range; i <= max_range; i++) { rho[i] = calculate_rho_first(speech, i, x_start, min_range); } } num_peaks = identify_peak_candidates(rho, min_range, max_range, peak_set, x_start, verbose, speech, global_rms); if (num_peaks > MAX_CANDS) { sprintf(message, "MAX_CANDS not big enough for %d\n", num_peaks); help(message); } peak_ct[frame_ct] = num_peaks + 1; for (i = 0; i < num_peaks; i++) { pitch[frame_ct][i+1] = peak_set[i]; corr[frame_ct][i+1] = rho[peak_set[i]]; } frame_ct++; if (frame_ct >= NUM_FRAMES) { sprintf(message, "NUM_FRAMES not big enough for %d\n", frame_ct); help(message); } x_start += ten_ms; } /* The stuff in finp didnt seem to be very useful - so i got rid of that output file - R.E.D (23:8:93) fprintf(finp, "%d\n", frame_ct); for (i = 0; i < frame_ct; i++) { fprintf(finp, "%d ", peak_ct[i]); for (j = 1; j < peak_ct[i]; j++) fprintf(finp, "%d %f\n", pitch[i][j], corr[i][j]); } fclose(finp); */ /**** fscanf(finp, "%d", &frame_ct); for (i = 0; i < frame_ct; i++) { fscanf(finp, "%d", peak_ct + i); for (j = 1; j < peak_ct[i]; j++) fscanf(finp, "%d %f", &(pitch[i][j]), &(corr[i][j])); } ****/ done = 0; counter = 0; while (! done) { /* get a group */ while ((counter < frame_ct) && (peak_ct[counter] == 1)) counter++; if (counter >= frame_ct) done = 1; if (! done) { start_frame = counter; keep_going = 1; while (keep_going) { /* hack so we don't miss singulars */ while ((counter < frame_ct) && (peak_ct[counter] > 1)) counter++; if ((counter+1 < frame_ct) && (peak_ct[counter+1] > 1) && (counter+2 < frame_ct) && (peak_ct[counter+2] > 1)) counter = counter + 1; else { end_frame = counter-1; keep_going = 0; } } /* got a group */ if (end_frame > start_frame) { make_possible(start_frame, end_frame, pitch, possible, peak_ct, corr, frame_ct); cand_ct = 0; for (j = 1; j < peak_ct[start_frame]; j++) { warp(start_frame, end_frame, pitch, corr, peak_ct, j, possible, best_path, best_cost, &cand_ct, best_no_bad); } if (cand_ct >= MAX_CANDS) { sprintf(message, "cand_ct %d too big for MAX_CANDS", cand_ct); help(message); } /* Choosing which string to use */ min = min_bad = VBIG; for (i = 0; i < cand_ct; i++) { if (best_cost[i] < min) { min = best_cost[i]; at = i; } if (best_no_bad[i] < min_bad) { min_bad = best_no_bad[i]; at_bad = i; } } if (at == at_bad) { assign_targ(targ_pitch, start_frame, end_frame, best_path, at, pitch); } else { /* lets look at the options */ if ((num_shared = share(at, at_bad, pitch, start_frame, end_frame, best_path))) { num_bad = 0; for (i = start_frame; i <= end_frame; i++) if ((corr[i][best_path[at_bad][i-start_frame+1]] == PENALTY) && (pitch[i][best_path[at_bad][i-start_frame+1]] != pitch[i][best_path[at][i-start_frame+1]])) num_bad++; if ((end_frame-start_frame+1 == num_shared + num_bad) || (num_bad > round((end_frame-start_frame+1.0-num_shared)/2.0))) { /* keep the original */ assign_targ(targ_pitch, start_frame, end_frame, best_path, at, pitch); } else { /* figure out which one to use by calculating average over non-penalty portions */ ave1 = ave2 = 0.0; ct1 = ct2 = 0; for (i = start_frame; i <= end_frame; i++) { if (corr[i][best_path[at][i-start_frame+1]] != PENALTY) { ave1 += (1.0 - corr[i][best_path[at][i-start_frame+1]]); ct1++; } if (corr[i][best_path[at_bad][i-start_frame+1]] != PENALTY) { ave2 += (1.0 - corr[i][best_path[at_bad][i-start_frame+1]]); ct2++; } } ave1 /= ct1; ave2 /= ct2; if (ave1 < ave2) { /* use at */ assign_targ(targ_pitch, start_frame, end_frame, best_path, at, pitch); } else { /* use at_bad */ assign_targ(targ_pitch, start_frame, end_frame, best_path, at_bad, pitch); } } } else { /* they dont share a common path - so lets check if one is much better than the other, and if not, lets delay the choice until we have analysed the entire sentence */ ave1 = ave2 = 0.0; ct1 = ct2 = 0; for (i = start_frame; i <= end_frame; i++) { if (corr[i][best_path[at][i-start_frame+1]] != PENALTY) { ave1 += 1.0 - corr[i][best_path[at][i-start_frame+1]]; ct1++; } if (corr[i][best_path[at_bad][i-start_frame+1]] != PENALTY) { ave2 += 1.0 - corr[i][best_path[at_bad][i-start_frame+1]]; ct2++; } } ave1 /= ct1; ave2 /= ct2; if (ave1 < ave2 - 0.03) /* use at */ assign_targ(targ_pitch, start_frame, end_frame, best_path, at, pitch); else if (ave2 < ave1 - 0.03) /* use at_bad */ assign_targ(targ_pitch, start_frame, end_frame, best_path, at_bad, pitch); else { /* save these 2 choices for later resolution */ assign_targ(choice1, start_frame, end_frame, best_path, at, pitch); assign_targ(choice2, start_frame, end_frame, best_path, at_bad, pitch); } } } } } } /* now resolve any choices */ need_fix = 0; ave_pit_len = ave_ct = 0; for (i = 0; i < frame_ct; i++) { if (targ_pitch[i] > 0) { ave_pit_len += targ_pitch[i]; ave_ct++; } if (choice1[i] > 0) need_fix = 1; } if (ave_ct) ave_pit_len /= ave_ct; else { if (male) ave_pit_len = 125; /* SUSPECT HARD NUMBERING HERE **********/ else ave_pit_len = 71; /* But its not used i think - R.E.D */ printf("average pitch length being set to fixed number\nwhich i think ought to be scaled by the sampling frequency - but isnt. - R.E.D"); } if (need_fix) { done = counter = 0; while (! done) { while ((counter < frame_ct) && (choice1[counter] == 0)) counter++; if (counter >= frame_ct) done = 1; if (! done) { start_frame = counter; while ((counter < frame_ct) && (choice1[counter] > 0)) counter++; end_frame = counter-1; /* get the averages and choose the one closest to ave_pit_len */ ave1 = ave2 = 0.0; ct1 = 0; for (i = start_frame; i <= end_frame; i++) { ave1 += choice1[i]; ave2 += choice2[i]; ct1++; } ave1 /= ct1; ave2 /= ct1; if (absof(ave1 - ave_pit_len) < absof(ave2 - ave_pit_len)) { for (i = start_frame; i <= end_frame; i++) targ_pitch[i] = choice1[i]; } else { for (i = start_frame; i <= end_frame; i++) targ_pitch[i] = choice2[i]; } } } } done = counter = 0; while (! done) { while ((counter < frame_ct) && (targ_pitch[counter] == 0)) counter++; if (counter >= frame_ct) done = 1; if (! done) { start_frame = counter; while ((counter < frame_ct) && (targ_pitch[counter] > 0)) counter++; end_frame = counter-1; x_start = start_frame * ten_ms; low_range = (int) ((1.0-MAX_SEP) * targ_pitch[start_frame]); if (low_range < min_range) low_range = min_range; high_range = (int) ((1.0+MAX_SEP) * targ_pitch[start_frame]); if (high_range > max_range) high_range = max_range; cur_end = x_start; end_pt = (end_frame * ten_ms) + (2 * targ_pitch[end_frame]); while (cur_end < end_pt) { for (i = 0; i <= max_range; i++) rho[i] = 0.0; best = 0.0; for (i = low_range; i <= high_range; i++) { rho[i] = calculate_rho_first(speech, i, x_start, low_range); if (rho[i] > best) { best = rho[i]; at2 = i; } } fprintf(ppd, "%d %d\n", at2, x_start); x_start += at2; index = 1 + x_start/ten_ms; if (index < start_frame) index = start_frame; if (index > end_frame) index = end_frame; target = targ_pitch[index]; low_range = (int) ((1.0-MAX_SEP) * target); if (low_range < min_range) low_range = min_range; high_range = (int) ((1.0+MAX_SEP) * target); if (high_range > max_range) high_range = max_range; cur_end = x_start + at2; } } } fclose(ppd); /* now reopen the output file written so far for reading, and from this calculate a pitch file of time against pitch. Resolution = 1 ms R.E.D 24:8:93 */ if ((ppd = fopen(ppdfn, "r")) == NULL) { fprintf(stderr,"Cannot reopen output file %s\n",ppdfn); exit(1); } time = 0.0; mstime = 0; out_pitch = 0.0; end_of_file = 0; last_length = 0; last_start = 0; fscanf(ppd, "%d", &length); fscanf(ppd, "%d", &start); start_time = start*1.0/sampling_rate; while ( 1 == 1 ) { while (start_time < time) { last_length = length; last_start = start; if (fscanf(ppd, "%d", &length) == EOF) { end_of_file = 1; break; } fscanf(ppd, "%d", &start); start_time = start*1.0/sampling_rate; } if (end_of_file == 1) break; /* if voiced then interpolate for the pitch */ if (start == last_start + last_length) out_pitch = ( (sampling_rate*time - last_start)/((start-last_start)*length) +(start - sampling_rate*time)/((start-last_start)*last_length)) *sampling_rate; else out_pitch = 0.0; fprintf(ptm,"%f %f\n",time,out_pitch); mstime++; time = mstime/1000.0; } /* if there's an unvoiced/silent bit at the end then output zeros to the output file */ if (filesize > (start + 3*length) ) { while (sampling_rate*time < filesize) { fprintf(ptm,"%f %f\n",time,0.0); mstime++; time = mstime/1000.0; } } fclose(ppd); fclose(ptm); fclose(fwav); }
int main(int argc, char *argv[]) { char *datafn=NULL; char *datafn2=NULL; char *s; char *scriptFile; char datafn1[MAXSTRLEN]; char newFn[MAXSTRLEN]; FILE *f; UttInfo *utt; /* utterance information storage */ FBInfo *fbInfo; /* forward-backward information storage */ HMMSet hset; /* Set of HMMs to be re-estimated */ Source src; float tmpFlt; int tmpInt; int numUtt,spUtt=0; void Initialise(FBInfo *fbInfo, MemHeap *x, HMMSet *hset, char *hmmListFn); void DoForwardBackward(FBInfo *fbInfo, UttInfo *utt, char *datafn, char *datafn2); void UpdateModels(HMMSet *hset, ParmBuf pbuf2); void StatReport(HMMSet *hset); if(InitShell(argc,argv,herest_version,herest_vc_id)<SUCCESS) HError(2300,"HERest: InitShell failed"); InitMem(); InitMath(); InitSigP(); InitAudio(); InitWave(); InitVQ(); InitLabel(); InitModel(); if(InitParm()<SUCCESS) HError(2300,"HERest: InitParm failed"); InitTrain(); InitUtil(); InitFB(); InitAdapt(&xfInfo); InitMap(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); al_hmmDir[0] = '\0'; al_hmmExt[0] = '\0'; al_hmmMMF[0] = '\0'; al_hmmLst[0] = '\0'; up_hmmMMF[0] = '\0'; CreateHeap(&hmmStack,"HmmStore", MSTAK, 1, 1.0, 50000, 500000); SetConfParms(); CreateHMMSet(&hset,&hmmStack,TRUE); CreateHeap(&uttStack, "uttStore", MSTAK, 1, 0.5, 100, 1000); utt = (UttInfo *) New(&uttStack, sizeof(UttInfo)); CreateHeap(&fbInfoStack, "FBInfoStore", MSTAK, 1, 0.5, 100 , 1000 ); fbInfo = (FBInfo *) New(&fbInfoStack, sizeof(FBInfo)); CreateHeap(&accStack, "accStore", MSTAK, 1, 1.0, 50000, 500000); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2319,"HERest: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': if (NextArg()!=STRINGARG) HError(2319,"HERest: script file expected"); scriptFile = GetStrArg(); break; case 'c': minFrwdP = GetChkedFlt(0.0,1000.0,s); break; case 'd': if (NextArg()!=STRINGARG) HError(2319,"HERest: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'm': minEgs = GetChkedInt(0,1000,s); break; case 'o': if (NextArg()!=STRINGARG) HError(2319,"HERest: HMM file extension expected"); newExt = GetStrArg(); break; case 'p': parMode = GetChkedInt(0,500,s); break; case 'r': twoDataFiles = TRUE; break; case 's': stats = TRUE; if (NextArg()!=STRINGARG) HError(2319,"HERest: Stats file name expected"); statFN = GetStrArg(); break; case 't': pruneInit = GetChkedFlt(0.0,1.0E20,s); if (NextArg()==FLOATARG || NextArg()==INTARG) { pruneInc = GetChkedFlt(0.0,1.0E20,s); pruneLim = GetChkedFlt(0.0,1.0E20,s); } else { pruneInc = 0.0; pruneLim = pruneInit ; } break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,10.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'x': if (NextArg()!=STRINGARG) HError(2319,"HERest: HMM file extension expected"); hmmExt = GetStrArg(); break; case 'B': saveBinary=TRUE; break; case 'F': if (NextArg() != STRINGARG) HError(2319,"HERest: Data File format expected"); if((dff = Str2Format(GetStrArg())) == ALIEN) HError(-2389,"HERest: Warning ALIEN Data file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(2319,"HERest: Label File format expected"); if((lff = Str2Format(GetStrArg())) == ALIEN) HError(-2389,"HERest: Warning ALIEN Label file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(2319,"HERest: HMM macro file name expected"); strcpy(up_hmmMMF,GetStrArg()); AddMMF(&hset,up_hmmMMF); break; case 'I': if (NextArg() != STRINGARG) HError(2319,"HERest: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(2319,"HERest: Label file directory expected"); labDir = GetStrArg(); break; case 'M': if (NextArg()!=STRINGARG) HError(2319,"HERest: Output macro file directory expected"); newDir = GetStrArg(); break; case 'T': trace = GetChkedInt(0,0100000,s); break; case 'X': if (NextArg()!=STRINGARG) HError(2319,"HERest: Label file extension expected"); labExt = GetStrArg(); break; /* additional options for transform support */ case 'a': xfInfo.useInXForm = TRUE; break; case 'h': if (NextArg()!=STRINGARG) HError(1,"Speaker name pattern expected"); xfInfo.outSpkrPat = GetStrArg(); break; case 'l': maxSpUtt = GetChkedInt(0,0100000,s); break; case 'E': if (NextArg()!=STRINGARG) HError(2319,"HERest: parent transform directory expected"); xfInfo.usePaXForm = TRUE; xfInfo.paXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -E as the last option"); break; case 'J': if (NextArg()!=STRINGARG) HError(2319,"HERest: input transform directory expected"); AddInXFormDir(&hset,GetStrArg()); if (NextArg()==STRINGARG) { if (xfInfo.inXFormExt == NULL) xfInfo.inXFormExt = GetStrArg(); else HError(2319,"HERest: only one input transform extension may be specified"); } if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -J as the last option"); break; case 'K': if (NextArg()!=STRINGARG) HError(2319,"HERest: output transform directory expected"); xfInfo.outXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.outXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -K as the last option"); break; case 'z': if (NextArg() != STRINGARG) HError(2319,"HERest: output TMF file expected"); xfInfo.xformTMF = GetStrArg(); break; default: HError(2319,"HERest: Unknown switch %s",s); } } if (NextArg() != STRINGARG) HError(2319,"HERest: file name of vocabulary list expected"); Initialise(fbInfo, &fbInfoStack, &hset, GetStrArg()); InitUttInfo(utt, twoDataFiles); numUtt = 1; if (trace&T_TOP) SetTraceFB(); /* allows HFB to do top-level tracing */ do { if (NextArg()!=STRINGARG) HError(2319,"HERest: data file name expected"); if (twoDataFiles && (parMode!=0)){ if ((NumArgs() % 2) != 0) HError(2319,"HERest: Must be even num of training files for single pass training"); strcpy(datafn1,GetStrArg()); datafn = datafn1; datafn2 = GetStrArg(); }else datafn = GetStrArg(); if (parMode==0){ src=LoadAccs(&hset, datafn,uFlags); ReadFloat(&src,&tmpFlt,1,ldBinary); totalPr += (LogDouble)tmpFlt; ReadInt(&src,&tmpInt,1,ldBinary); totalT += tmpInt; CloseSource( &src ); } else { /* track speakers */ if (UpdateSpkrStats(&hset,&xfInfo, datafn)) spUtt=0; /* Check to see whether set-up is valid */ CheckUpdateSetUp(); fbInfo->inXForm = xfInfo.inXForm; fbInfo->al_inXForm = xfInfo.al_inXForm; fbInfo->paXForm = xfInfo.paXForm; if ((maxSpUtt==0) || (spUtt<maxSpUtt)) DoForwardBackward(fbInfo, utt, datafn, datafn2) ; numUtt += 1; spUtt++; } } while (NumArgs()>0); if (uFlags&UPXFORM) {/* ensure final speaker correctly handled */ UpdateSpkrStats(&hset,&xfInfo, NULL); if (trace&T_TOP) { printf("Reestimation complete - average log prob per frame = %e (%d frames)\n", totalPr/totalT, totalT); } } else { if (parMode>0 || (parMode==0 && (updateMode&UPMODE_DUMP))){ MakeFN("HER$.acc",newDir,NULL,newFn); f=DumpAccs(&hset,newFn,uFlags,parMode); tmpFlt = (float)totalPr; WriteFloat(f,&tmpFlt,1,ldBinary); WriteInt(f,(int*)&totalT,1,ldBinary); fclose( f ); } if (parMode <= 0) { if (stats) { StatReport(&hset); } if (updateMode&UPMODE_UPDATE) UpdateModels(&hset,utt->pbuf2); } } ResetHeap(&uttStack); ResetHeap(&fbInfoStack); ResetHeap(&hmmStack); ResetHeap(&accStack); Exit(0); return (0); /* never reached -- make compiler happy */ }
int main(int argc, char *argv[]) { int i; char *s,*c; char fmt[256]; InitShell(argc,argv,lnorm_version,lnorm_vc_id); InitMem(); InitMath(); InitWave(); InitLabel(); InitWMap(); InitLUtil(); InitLModel(); InitPCalc(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(EXIT_SUCCESS); SetConfParms(); CreateHeap(&langHeap,"langHeap",MSTAK,1,0.5,5000,40000); for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0; while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(16519,"Bad switch %s; must be single letter",s); switch(s[0]){ case 'c': i = GetChkedInt(2,LM_NSIZE,s); cutOff[i] = GetChkedInt(1,1000,s); break; case 'd': i = GetChkedInt(2,LM_NSIZE,s); wdThresh[i] = GetChkedFlt(0.0,1E10,s); break; case 'f': strcpy(fmt,GetStrArg()); for (c=fmt; *c!=0; *c=toupper(*c),c++); if (strcmp(fmt, LM_TXT_TEXT)==0) saveFmt = LMF_TEXT; else if (strcmp(fmt, LM_TXT_BINARY)==0) saveFmt = LMF_BINARY; else if (strcmp(fmt, LM_TXT_ULTRA)==0) saveFmt = LMF_ULTRA; else HError(16519,"Unrecognised LM format, should be one of [%s, %s, %s]", LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA); break; case 'n': nSize = GetChkedInt(1,LM_NSIZE,s); break; case 'w': if (NextArg() != STRINGARG) HError(16519,"LPlex: Word list file name expected"); wlistFN = GetStrArg(); break; case 'T': trace = GetChkedInt(0,077, s); break; default: HError(16519,"LMPlex: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) /* load the language model */ HError(16519, "Input language model filename expected"); srcFN = GetStrArg(); if (NextArg()!=STRINGARG) /* load the language model */ HError(16519, "Output language model filename expected"); tgtFN= GetStrArg(); if (wlistFN!=NULL) { CreateWordList(wlistFN,&wlist,10); lm = LoadLangModel(srcFN,&wlist,1.0,LMP_FLOAT|LMP_COUNT,&langHeap); } else { lm = LoadLangModel(srcFN,NULL,1.0,LMP_FLOAT|LMP_COUNT,&langHeap); } if (lm->probType==LMP_COUNT) { RebuildLM(lm,cutOff,wdThresh,LMP_FLOAT); } else { NormaliseLM(lm); } if (nSize>0 && nSize<lm->nSize) lm->nSize = nSize; for (i=1;i<=lm->nSize;i++) lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : saveFmt; SaveLangModel(tgtFN,lm); Exit(EXIT_SUCCESS); return EXIT_SUCCESS; /* never reached -- make compiler happy */ }
int main(int argc, char *argv[]) { char *s,*lfn,*dfn; void GenSentences(char *latfn, char *dicfn); if(InitShell(argc,argv,hsgen_version,hsgen_vc_id)<SUCCESS) HError(3400,"HSGen: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(3200,"HSGen: InitParm failed"); InitDict(); InitNet(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3419, "Bad switch %s; must be single letter", s); switch(s[0]){ case 's': stats = TRUE; break; case 'l': lnum = TRUE; break; case 'n': ngen = GetChkedInt(1,1000000,s); break; case 'q': quiet = TRUE; break; case 'T': trace = GetChkedInt(0,07,s); break; default: HError(3419, "Unknown switch %s", s); } } if (NextArg()!=STRINGARG) HError(3419, "lattice file name expected"); lfn = GetStrArg(); if (NextArg()!=STRINGARG) HError(3419, "dictionary file name expected"); dfn = GetStrArg(); GenSentences(lfn,dfn); if(InitShell(argc,argv,hsgen_version,hsgen_vc_id)<SUCCESS) HError(3400,"HSGen: InitShell failed"); ResetNet(); ResetDict(); ResetParm(); ResetModel(); ResetVQ(); ResetAudio(); ResetWave(); ResetSigP(); ResetMath(); ResetLabel(); ResetMem(); ResetShell(); Exit(0); return (0); /* never reached -- make compiler happy */ }
strcpy(vName,"varFloor"); strcat(vName,num); fprintf(f,"~v %s\n",vName); if (fullcNeeded[s]) TriDiag2Vector(accs[s].squareSum.inv,v); else CopyVector(accs[s].fixed.var,v); for (i=1; i<=hset.swidth[s]; i++) v[i] *= vFloorScale; fprintf(f,"<Variance> %d\n",hset.swidth[s]); WriteVector(f,v,FALSE); FreeVector(&gstack,v); } fclose(f); if (trace&T_TOP) printf("Var floor macros output to file %s\n",outfn); } /* ---------------- Load Data and Accumulate Stats --------------- */ /* AccVar: update global accumulators with given observation */ void AccVar(Observation obs) { int x,y,s,V; float val; Vector v; totalCount++; for (s=1; s<=hset.swidth[0]; s++){ v = obs.fv[s]; V = hset.swidth[s]; for (x=1;x<=V;x++) { val=v[x]; accs[s].meanSum[x] += val; /* accumulate mean */ if (fullcNeeded[s]) { /* accumulate covar */ accs[s].squareSum.inv[x][x] += val*val; for (y=1;y<x;y++) accs[s].squareSum.inv[x][y] += val*v[y]; } else /* accumulate var */ accs[s].squareSum.var[x] += val*val; } } } /* CheckData: check data file consistent with HMM definition */ void CheckData(char *fn, BufferInfo info) { if (info.tgtVecSize!=hset.vecSize) HError(2050,"CheckData: Vector size in %s[%d] is incompatible with hmm %s[%d]", fn,info.tgtVecSize,hmmfn,hset.vecSize); if (info.tgtPK != hset.pkind) HError(2050,"CheckData: Parameterisation in %s is incompatible with hmm %s", fn,hmmfn); } /* LoadFile: load whole file or segments and accumulate variance */ void LoadFile(char *fn) { ParmBuf pbuf; BufferInfo info; char labfn[80]; Transcription *trans; long segStIdx,segEnIdx; int i,j,ncas,nObs; LLink p; if (segId == NULL) { /* load whole parameter file */ if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL) HError(2050,"LoadFile: Config parameters invalid"); GetBufferInfo(pbuf,&info); CheckData(fn,info); nObs = ObsInBuffer(pbuf); for (i=0; i<nObs; i++){ ReadAsTable(pbuf,i,&obs); AccVar(obs); } if (trace&T_LOAD) { printf(" %d observations loaded from %s\n",nObs,fn); fflush(stdout); } CloseBuffer(pbuf); } else { /* load segment of parameter file */ MakeFN(fn,labDir,labExt,labfn); trans = LOpen(&iStack,labfn,lff); ncas = NumCases(trans->head,segId); if ( ncas > 0) { if((pbuf=OpenBuffer(&iStack, fn, 0, dff, FALSE_dup, FALSE_dup))==NULL)
int main(int argc, char *argv[]) { char *s,*fn; cst_voice *voice; // synthesis voice cst_utterance *utt; // current utterance cst_wave *cstwave; // synthesised wave Wave w; // HTK wave short *p; HTime sampPeriod = 625.0; int n; MemHeap mem; AudioOut a; try { if (InitHTK(argc,argv,version)<SUCCESS){ ReportErrors("Main",0); exit(-1); } if (NumArgs() !=2) { printf("SFliteTest synthstring file\n"); exit(0); } CreateHeap(&mem,"heap",MSTAK,1,0.0,10000,100000); s = GetStrArg(); fn = GetStrArg(); printf("Synth: %s -> %s\n",s,fn); // initialise Edinburgh cst lib cst_regex_init(); // setup the voice voice = register_cmu_us_kal16(NULL); // convert text to waveform utt = flite_synth_text(s,voice); if (utt==NULL) { HRError(12001,"SFliteTest: cant synthesise %s\n",s); throw ATK_Error(12001); } cstwave = utt_wave(utt); p = cstwave->samples; n = cstwave->num_samples; w = OpenWaveOutput(&mem,&sampPeriod,n); printf("%d samples created\n",n); PutWaveSample(w,n,p); if (CloseWaveOutput(w,WAV,fn)<SUCCESS){ ReportErrors("Main",0); exit(-1); } // explore structure const cst_item *it, *itlast = NULL; float x,y; int i; string lastword="0"; x = 0; for (i=1,it = relation_head(utt_relation(utt, "Segment")); it!=NULL; it = item_next(it),i++) { printf("Segment %d\n",i); y = item_feat_float(it,"end"); string ph = string(ffeature_string(it,"p.name")); string wd = string(ffeature_string(it,"R:SylStructure.parent.parent.name")); //printf("end = %f ph=%s wd=%s\n",y,ph.c_str(),wd.c_str()); if (wd != lastword){ printf("**** end of %s = %f\n",lastword.c_str(),x); lastword=wd; } x = y; } //if (itlast!=NULL) { // word = string(ffeature_string(itlast,"R:SylStructure.parent.parent.name")); // idx = text.find(word); //} return 0; } catch (ATK_Error e){ ReportErrors("ATK",e.i); } catch (HTK_Error e){ ReportErrors("HTK",e.i); } return 0; }
int main(int argc, char *argv[]) { char *wordListFn,*latFn,*ipFn=NULL; LModel *bigramLm; BuildType bType = unknown; Boolean saveLatBin = FALSE; LatFormat format = HLAT_LMLIKE; Lattice *lat,*ipLat; Vocab voc; char *s; Lattice *ProcessWordLoop(MemHeap *latHeap, Vocab *voc); Lattice *ProcessBiGram(MemHeap *latHeap, Vocab *voc, LModel *biLM); void SaveLattice(Lattice *lat, char *latFn, LatFormat format); Lattice *LoadLattice(MemHeap *latHeap, char *latFn, Vocab *voc, Boolean shortArc); Lattice *ProcessWordPair(MemHeap *latHeap, Vocab *voc, char *fn); if(InitShell(argc,argv,hbuild_version,hbuild_vc_id)<SUCCESS) HError(3000,"HBuild: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitDict(); InitNet(); InitLM(); CreateHeap(&buildStack, "HBuild Stack", MSTAK, 1, 0.0, 100000, LONG_MAX ); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */ exitId=GetLabId("!EXIT",TRUE); /* to start enterId and end exitId */ unknownId=GetLabId("!NULL",TRUE); /* Name for words not in list */ while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3019,"HBuild: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': saveLatBin = TRUE; break; case 'm': if (bType != unknown) HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x"); bType = matBiGram; if (NextArg()!=STRINGARG) HError(3019,"HBuild: Matrix Bigram file name expected"); ipFn = GetStrArg(); break; case 'n': if (bType != unknown) HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x"); bType = boBiGram; if (NextArg()!=STRINGARG) HError(3019,"HBuild: Back-off Bigram file name expected"); ipFn = GetStrArg(); break; case 's': if (NextArg() != STRINGARG) HError(3019,"HBuild: Bigram ENTER label name expected"); enterId=GetLabId(GetStrArg(),TRUE); if (NextArg() != STRINGARG) HError(3019,"HBuild: Bigram EXIT label name expected"); exitId=GetLabId(GetStrArg(),TRUE); break; case 't': if (NextArg() != STRINGARG) HError(3019,"HBuild: Bracket start label name expected"); bStartId=GetLabId(GetStrArg(),TRUE); if (NextArg() != STRINGARG) HError(3019,"HBuild: Bracket end label name expected"); bEndId=GetLabId(GetStrArg(),TRUE); break; case 'u': if (NextArg() != STRINGARG) HError(3019,"HBuild: Unknown label name expected"); unknownId=GetLabId(GetStrArg(),TRUE); break; case 'w': if (bType != unknown) HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x"); bType = wordPair; if (NextArg()!=STRINGARG) HError(3019,"HBuild: Word pair grammar file name expected"); ipFn = GetStrArg(); break; case 'x': if (bType != unknown) HError(3019,"HBuild: Can only specifiy one of -m, -n, -w, -x"); bType = multiLat; if (NextArg()!=STRINGARG) HError(3019,"HBuild: Multi-level lattice file name expected"); ipFn = GetStrArg(); break; case 'z': zapUnknown = TRUE; break; case 'T': trace = GetChkedInt(0,511,s); break; default: HError(3019,"HBuild: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(3019,"HBuild: Word List file name expected"); wordListFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(3019,"HBuild: output lattice file name expected"); latFn = GetStrArg(); if (bType == unknown) bType = wordLoop; if (saveLatBin) format |= HLAT_LBIN; /* Read the word-list into a Vocab data structure */ InitVocab(&voc); if(ReadDict(wordListFn, &voc)<SUCCESS) HError(3013,"HBuild: ReadDict failed"); switch (bType) { case matBiGram: if (trace & T_TOP) printf("Reading bigram from file %s\n",ipFn); bigramLm = ReadLModel(&gstack, ipFn); if (bigramLm->type != matBigram) HError(3030,"HBuild: File specified is not a matrix bigram"); lat = ProcessBiGram(&gstack,&voc,bigramLm); SaveLattice(lat,latFn,format); break; case boBiGram: if (trace & T_TOP) printf("Reading bigram from file %s\n",ipFn); bigramLm = ReadLModel(&gstack, ipFn); if (bigramLm->type != boNGram) HError(3030,"HBuild: File specified is not a back-off bigram"); lat = ProcessBiGram(&gstack,&voc,bigramLm); SaveLattice(lat,latFn,format); break; case multiLat: if (trace & T_TOP) printf("Reading input lattice from file %s\n",ipFn); ipLat = LoadLattice(&buildStack,ipFn,&voc,FALSE); if (ipLat->subList!=NULL) { if (trace & T_TOP) printf("Expanding multi-level lattice\n"); lat = ExpandMultiLevelLattice(&buildStack,ipLat,&voc); } else lat = ipLat; SaveLattice(lat,latFn,format); break; case wordLoop: if (trace & T_TOP) printf("Building word loop\n"); lat = ProcessWordLoop(&gstack,&voc); SaveLattice(lat,latFn,format); break; case wordPair: lat = ProcessWordPair(&gstack,&voc,ipFn); SaveLattice(lat,latFn,format); break; default: HError(3001,"Only Bigram LMs / multiLats currently implemented"); } ResetLM(); ResetNet(); ResetDict(); ResetMath(); ResetLabel(); ResetMem(); ResetShell(); Exit(0); return (0); /* never reached -- make compiler happy */ }
/* DoRecognition: use single network to recognise each input utterance */ void DoRecognition(void) { FILE *nf; Network *net; Boolean isPipe; int n=0; AdaptXForm *incXForm; if ( (nf = FOpen(wdNetFn,NetFilter,&isPipe)) == NULL) HError(3210,"DoRecognition: Cannot open Word Net file %s",wdNetFn); if((wdNet = ReadLattice(nf,&ansHeap,&vocab,TRUE,FALSE))==NULL) HError(3210,"DoAlignment: ReadLattice failed"); FClose(nf,isPipe); if (trace&T_TOP) { printf("Read lattice with %d nodes / %d arcs\n",wdNet->nn,wdNet->na); fflush(stdout); } CreateHeap(&netHeap,"Net heap",MSTAK,1,0, wdNet->na*sizeof(NetLink),wdNet->na*sizeof(NetLink)); net = ExpandWordNet(&netHeap,wdNet,&vocab,&hset); ResetHeap(&ansHeap); if (trace&T_TOP) { printf("Created network with %d nodes / %d links\n", net->numNode,net->numLink); fflush(stdout); } if (trace & T_MEM){ printf("Memory State Before Recognition\n"); PrintAllHeapStats(); } if (NumArgs()==0) { /* Process audio */ while(TRUE){ printf("\nREADY[%d]>\n",++n); fflush(stdout); /* no input transform possible for audio input .... */ ProcessFile(NULL,net,n,genBeam, FALSE); if (update > 0 && n%update == 0) { if (trace&T_TOP) { printf("Transforming model set\n"); fflush(stdout); } /* at every stage a new transform is created - fix?? Estimate transform and then set it up as the input XForm */ incXForm = CreateAdaptXForm(&hset,"inc"); TidyBaseAccs(); GenAdaptXForm(&hset,incXForm); xfInfo.inXForm = GetMLLRDiagCov(incXForm);; SetXForm(&hset,xfInfo.inXForm); ApplyHMMSetXForm(&hset,xfInfo.inXForm); } } } else { /* Process files */ while (NumArgs()>0) { if (NextArg()!=STRINGARG) HError(3219,"DoRecognition: Data file name expected"); datFN = GetStrArg(); if (trace&T_TOP) { printf("File: %s\n",datFN); fflush(stdout); } /* This handles the initial input transform, parent transform setting and output transform creation */ if (UpdateSpkrStats(&hset, &xfInfo, datFN) && (!(xfInfo.useInXForm)) && (hset.semiTied == NULL)) { xfInfo.inXForm = NULL; } ProcessFile(datFN,net,n++,genBeam,FALSE); if (update > 0 && n%update == 0) { if (trace&T_TOP) { printf("Transforming model set\n"); fflush(stdout); } /* at every stage a new transform is created - fix?? Estimate transform and then set it up as the input XForm */ incXForm = CreateAdaptXForm(&hset,"inc"); TidyBaseAccs(); GenAdaptXForm(&hset,incXForm); xfInfo.inXForm = GetMLLRDiagCov(incXForm);; SetXForm(&hset,xfInfo.inXForm); ApplyHMMSetXForm(&hset,xfInfo.inXForm); } } } }
/* DoAlignment: by creating network from transcriptions or lattices */ void DoAlignment(void) { FILE *nf; char lfn[MAXSTRLEN], buf[MAXSTRLEN]; Transcription *trans; Network *net; Boolean isPipe; int n=0; LogDouble currGenBeam; AdaptXForm *incXForm; if (trace&T_TOP) { if (loadNetworks) printf("New network will be used for each file\n"); else printf("Label file will be used to align each file\n"); fflush(stdout); } CreateHeap(&netHeap,"Net heap",MSTAK,1,0,8000,80000); while (NumArgs()>0) { if (NextArg() != STRINGARG) HError(3219,"DoAlignment: Data file name expected"); datFN = GetStrArg(); if (trace&T_TOP) { printf("Aligning File: %s\n",datFN); fflush(stdout); } if (labFileMask != NULL ) { /* support for rescoring lattice masks */ if (!MaskMatch(labFileMask,buf,datFN)) HError(2319,"DoAlignment: mask %s has no match with segemnt %s",labFileMask,datFN); MakeFN(buf,labInDir,labInExt,lfn); } else { MakeFN(datFN,labInDir,labInExt,lfn); } if (loadNetworks) { if ( (nf = FOpen(lfn,NetFilter,&isPipe)) == NULL) HError(3210,"DoAlignment: Cannot open Word Net file %s",lfn); if((wdNet = ReadLattice(nf,&netHeap,&vocab,TRUE,FALSE))==NULL) HError(3210,"DoAlignment: ReadLattice failed"); FClose(nf,isPipe); if (trace&T_TOP) { printf("Read lattice with %d nodes / %d arcs\n", wdNet->nn,wdNet->na); fflush(stdout); } } else { LabList *ll = NULL; trans=LOpen(&netHeap,lfn,ifmt); if (trans->numLists >= 1) ll = GetLabelList(trans,1); if (!ll && !bndId) HError(3233, "DoAlignment: cannot align empty transcription"); wdNet=LatticeFromLabels(ll, bndId, &vocab,&netHeap); if (trace&T_TOP) { printf("Created lattice with %d nodes / %d arcs from label file\n", wdNet->nn,wdNet->na); fflush(stdout); } } net=ExpandWordNet(&netHeap,wdNet,&vocab,&hset); ++n; currGenBeam = genBeam; /* This handles the initial input transform, parent transform setting and output transform creation */ if (UpdateSpkrStats(&hset, &xfInfo, datFN) && (!(xfInfo.useInXForm)) && (hset.semiTied == NULL)) { xfInfo.inXForm = NULL; } if (genBeamInc == 0.0) ProcessFile (datFN, net, n, currGenBeam, FALSE); else { Boolean completed; completed = ProcessFile (datFN, net, n, currGenBeam, TRUE); currGenBeam += genBeamInc; while (!completed && (currGenBeam <= genBeamLim - genBeamInc)) { completed = ProcessFile (datFN, net, n, currGenBeam, TRUE); currGenBeam += genBeamInc; } if (!completed) ProcessFile (datFN, net, n, currGenBeam, FALSE); } if (update > 0 && n%update == 0) { if (trace&T_TOP) { printf("Transforming model set\n"); fflush(stdout); } /* at every stage a new transform is created - fix?? Estimate transform and then set it up as the input XForm */ incXForm = CreateAdaptXForm(&hset,"inc"); TidyBaseAccs(); GenAdaptXForm(&hset,incXForm); xfInfo.inXForm = GetMLLRDiagCov(incXForm);; SetXForm(&hset,xfInfo.inXForm); ApplyHMMSetXForm(&hset,xfInfo.inXForm); } ResetHeap(&netHeap); } }
int main(int argc, char *argv[]) { char *s; void Initialise(void); void DoRecognition(void); void DoAlignment(void); if(InitShell(argc,argv,hvite_version,hvite_vc_id)<SUCCESS) HError(3200,"HVite: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(3200,"HVite: InitParm failed"); InitDict(); InitNet(); InitRec(); InitUtil(); InitAdapt(&xfInfo); InitMap(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHeap(&modelHeap, "Model heap", MSTAK, 1, 0.0, 100000, 800000 ); CreateHMMSet(&hset,&modelHeap,TRUE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3219,"HVite: Bad switch %s; must be single letter",s); switch(s[0]){ case 'a': loadLabels=TRUE; break; case 'b': if (NextArg()!=STRINGARG) HError(3219,"HVite: Utterance boundary word expected"); bndId = GetLabId(GetStrArg(),TRUE); break; case 'c': tmBeam = GetChkedFlt(0.0,1000.0,s); break; case 'd': if (NextArg()!=STRINGARG) HError(3219,"HVite: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'e': saveAudioOut=TRUE; break; case 'f': states=TRUE; break; case 'g': replay=TRUE; break; case 'i': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output MLF file name expected"); /* if(SaveToMasterfile(GetStrArg())<SUCCESS) HError(3214,"HCopy: Cannot write to MLF"); */ SaveToMasterfile(GetStrArg()); break; case 'k': xfInfo.useInXForm = TRUE; break; case 'j': if (NextArg()!=INTARG) HError(3219,"HVite: No. of files per online adaptation step expected"); update = GetChkedInt(1,256,s); break; case 'l': if (NextArg()!=STRINGARG) HError(3219,"HVite: Label file directory expected"); labDir = GetStrArg(); break; case 'm': models=TRUE; break; case 'n': nToks = GetChkedInt(2,MAX_TOKS,s); if (NextArg()==FLOATARG || NextArg()==INTARG) nTrans = GetChkedInt(1,10000,s); else nTrans = 1; break; case 'o': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output label format expected"); labForm = GetStrArg(); break; case 'p': wordPen = GetChkedFlt(-1000.0,1000.0,s); break; case 'q': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output lattice format expected"); latForm = GetStrArg(); break; case 'r': prScale = GetChkedFlt(0.0,1000.0,s); break; case 's': lmScale = GetChkedFlt(0.0,1000.0,s); break; case 't': genBeam = GetChkedFlt(0,1.0E20,s); if (genBeam == 0.0) genBeam = -LZERO; if (NextArg()==FLOATARG || NextArg()==INTARG) { genBeamInc = GetChkedFlt(0.0,1.0E20,s); genBeamLim = GetChkedFlt(0.0,1.0E20,s); if (genBeamLim < (genBeam + genBeamInc)) { genBeamLim = genBeam; genBeamInc = 0.0; } } else { genBeamInc = 0.0; genBeamLim = genBeam; } break; case 'w': if (NextArg()!=STRINGARG) loadNetworks=TRUE; else { wdNetFn = GetStrArg(); if (strlen(wdNetFn)==0) { wdNetFn=NULL; loadNetworks=TRUE; } } break; case 'u': maxActive = GetChkedInt(0,100000,s); break; case 'v': wordBeam = GetChkedFlt(0,1.0E20,s); if (wordBeam == 0.0) wordBeam = -LZERO; break; case 'x': if (NextArg()!=STRINGARG) HError(3219,"HVite: HMM file extension expected"); hmmExt = GetStrArg(); break; case 'y': if (NextArg()!=STRINGARG) HError(3219,"HVite: Output label file extension expected"); labExt = GetStrArg(); break; case 'z': if (NextArg()!=STRINGARG) HError(3219,"HVite: Lattice output file extension expected"); latExt = GetStrArg(); break; case 'F': if (NextArg() != STRINGARG) HError(3219,"HVite: Data File format expected"); if((dfmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Input file format set"); break; case 'G': if (NextArg() != STRINGARG) HError(3219,"HVite: Source Label File format expected"); if((ifmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Input file format set"); break; case 'H': if (NextArg() != STRINGARG) HError(3219,"HVite: MMF File name expected"); AddMMF(&hset,GetStrArg()); break; case 'I': if (NextArg() != STRINGARG) HError(3219,"HVite: MLF file name expected"); LoadMasterFile(GetStrArg()); break; case 'L': if (NextArg()!=STRINGARG) HError(3219,"HVite: Label/network file directory expected"); labInDir = GetStrArg(); break; case 'P': if (NextArg() != STRINGARG) HError(3219,"HVite: Target Label File format expected"); if((ofmt = Str2Format(GetStrArg())) == ALIEN) HError(-3289,"HVite: Warning ALIEN Label output file format set"); break; case 'B': saveBinary = TRUE; break; case 'T': trace = GetChkedInt(0,511,s); break; case 'X': if (NextArg()!=STRINGARG) HError(3219,"HVite: Input label/network file extension expected"); labInExt = GetStrArg(); break; case 'h': if (NextArg()!=STRINGARG) HError(1,"Speaker name pattern expected"); xfInfo.outSpkrPat = GetStrArg(); if (NextArg()==STRINGARG) { xfInfo.inSpkrPat = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paSpkrPat = GetStrArg(); } if (NextArg() != SWITCHARG) HError(2319,"HERest: cannot have -h as the last option"); break; case 'E': if (NextArg()!=STRINGARG) HError(2319,"HERest: parent transform directory expected"); xfInfo.usePaXForm = TRUE; xfInfo.paXFormDir = GetStrArg(); if (NextArg()==STRINGARG) xfInfo.paXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -E as the last option"); break; case 'J': if (NextArg()!=STRINGARG) HError(2319,"HERest: input transform directory expected"); AddInXFormDir(&hset,GetStrArg()); if (NextArg()==STRINGARG) xfInfo.inXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -J as the last option"); break; case 'K': if (NextArg()!=STRINGARG) HError(2319,"HVite: output transform directory expected"); xfInfo.outXFormDir = GetStrArg(); xfInfo.useOutXForm = TRUE; if (NextArg()==STRINGARG) xfInfo.outXFormExt = GetStrArg(); if (NextArg() != SWITCHARG) HError(2319,"HVite: cannot have -K as the last option"); break; default: HError(3219,"HVite: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(3219,"HVite: Dictionary file name expected"); dictFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(3219,"HVite: HMM list file name expected"); hmmListFn = GetStrArg(); #ifndef PHNALG if ((states || models) && nToks>1) HError(3230,"HVite: Alignment using multiple tokens is not supported"); #endif if (NumArgs()==0 && wdNetFn==NULL) HError(3230,"HVite: Network must be specified for recognition from audio"); if (loadNetworks && loadLabels) HError(3230,"HVite: Must choose either alignment from network or labels"); if (nToks>1 && latExt==NULL && nTrans==1) HError(-3230,"HVite: Performing nbest recognition with no nbest output"); if (nToks > 1 && latExt != NULL && nTrans > 1) HError(-3230,"HVite: Performing nbest recognition with 1-best and latttices output"); if ((update>0) && (!xfInfo.useOutXForm)) HError(3230,"HVite: Must use -K option with incremental adaptation"); Initialise(); /* Process the data */ if (wdNetFn==NULL) DoAlignment(); else DoRecognition(); /* Free up and we are done */ if (trace & T_MEM) { printf("Memory State on Completion\n"); PrintAllHeapStats(); } DeleteVRecInfo(vri); ResetHeap(&netHeap); FreePSetInfo(psi); UpdateSpkrStats(&hset,&xfInfo, NULL); ResetHeap(®Heap); ResetHeap(&modelHeap); Exit(0); return (0); /* never reached -- make compiler happy */ }
int main(int argc, char *argv[]) { Source src; int tmpInt; float tmpFlt; char *accfn, *s; void Initialise(char *hmmListFn); void Interpolate(void); void UpdateModels(void); void MakeWtAccLists(void); void AttachWtAccLists(void); void StatReport(void); if(InitShell(argc,argv,hsmooth_version,hsmooth_vc_id)<SUCCESS) HError(2400,"HSmooth: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitSigP(); InitWave(); InitAudio(); InitVQ(); InitModel(); if(InitParm()<SUCCESS) HError(2400,"HSmooth: InitParm failed"); InitTrain(); InitUtil(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); CreateHeap(&hmmStack,"HmmStore", MSTAK, 1, 1.0, 50000, 500000); CreateHMMSet(&hset,&hmmStack,TRUE); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(2419,"HSmooth: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': epsilon = GetChkedFlt(0.0,1.0,s); break; case 'c': maxStep = GetChkedInt(1,1000,s); break; case 'd': if (NextArg()!=STRINGARG) HError(2419,"HSmooth: HMM definition directory expected"); hmmDir = GetStrArg(); break; case 'e': if (NextArg()!=STRINGARG) HError(2419,"HSmooth: HMM definition directory expected"); newDir = GetStrArg(); break; case 'm': minEgs = GetChkedInt(1,1000,s); break; case 'o': if (NextArg()!=STRINGARG) HError(2419,"HSmooth: HMM file extension expected"); newExt = GetStrArg(); break; case 's': stats = TRUE; if (NextArg()!=STRINGARG) HError(2419,"HSmooth: Stats file name expected"); statFN = GetStrArg(); break; case 'u': SetuFlags(); break; case 'v': minVar = GetChkedFlt(0.0,10.0,s); break; case 'w': mixWeightFloor = MINMIX * GetChkedFlt(0.0,10000.0,s); break; case 'x': if (NextArg()!=STRINGARG) HError(2419,"HSmooth: HMM file extension expected"); hmmExt = GetStrArg(); break; case 'B': saveBinary=TRUE; break; case 'H': if (NextArg() != STRINGARG) HError(2419,"HSmooth: HMM macro file name expected"); AddMMF(&hset,GetStrArg()); break; case 'M': if (NextArg()!=STRINGARG) HError(2419,"HSmooth: Output macro file directory expected"); newDir = GetStrArg(); break; case 'T': trace = GetChkedInt(0,0100000,s); break; default: HError(2419,"HSmooth: Unknown switch %s",s); } } if (NextArg() != STRINGARG) HError(2419,"HSmooth: file name of HMM list expected"); Initialise(GetStrArg()); do { if (NextArg()!=STRINGARG) HError(2419,"HSmooth: accumulator file name expected"); accfn = GetStrArg(); src=LoadAccs(&hset,accfn,uFlags); ReadFloat(&src,&tmpFlt,1,ldBinary); totalPr += (LogDouble)tmpFlt; ReadInt(&src,&tmpInt,1,ldBinary); totalT += tmpInt; CloseSource(&src); nBlk++; MakeWtAccLists(); } while (NumArgs()>0); AttachWtAccLists(); Interpolate(); if (stats) StatReport(); UpdateModels(); ResetUtil(); ResetTrain(); ResetParm(); ResetModel(); ResetVQ(); ResetAudio(); ResetWave(); ResetSigP(); ResetMath(); ResetLabel(); ResetMem(); ResetShell(); Exit(0); return (0); /* never reached -- make compiler happy */ }