/// this never shrinks our previously allocated capacity void init_empty_hash(I capacityPowerOf2) { if (capacityPowerOf2 < 4) capacityPowerOf2 = 4; assert(is_power_of_2(capacityPowerOf2)); I const newmask = capacityPowerOf2 - 1; if (!index_ || mask_ < newmask) { freehash(); mask_ = newmask; index_ = (Indices)std::malloc(sizeof(I) * capacityPowerOf2); setGrowAt(capacityPowerOf2); } clear_hash(); }
int main (int argc, char **argv) { heapelement_t **CDheap=NULL; hashelement_t **CDhash=NULL; phnhashelement_t **CIhash=NULL; dicthashelement_t **dicthash=NULL; int32 cilistsize=0, cdheapsize=0, threshold, tph_list_given, ncd; char *phnlist, *incimdef, *triphnlist, *incdmdef; char *lsnfile, *dictfn, *fillerdictfn, **CIlist=NULL; char *cimdeffn, *alltphnmdeffn, *untiedmdeffn, *countfn; parse_cmd_ln(argc,argv); /* Test all flags before beginning */ cimdeffn = (char *)cmd_ln_access("-ocimdef"); alltphnmdeffn = (char *)cmd_ln_access("-oalltphnmdef"); untiedmdeffn = (char *)cmd_ln_access("-ountiedmdef"); countfn = (char *)cmd_ln_access("-ocountfn"); if (cimdeffn) E_INFO("Will write CI mdef file %s\n",cimdeffn); if (alltphnmdeffn) E_INFO("Will write alltriphone mdef file %s\n",alltphnmdeffn); if (untiedmdeffn) E_INFO("Will write untied mdef file %s\n",untiedmdeffn); if (countfn) E_INFO("Will write triphone counts file %s\n",countfn); if (!cimdeffn && !alltphnmdeffn && !untiedmdeffn && !countfn) E_FATAL("No output mdef files or count files specified!\n"); dictfn = (char *)cmd_ln_access("-dictfn"); fillerdictfn = (char *)cmd_ln_access("-fdictfn"); lsnfile = (char*)cmd_ln_access("-lsnfn"); if ((untiedmdeffn || countfn) && (!lsnfile || !dictfn)) { E_WARN("Either dictionary or transcript file not given!\n"); if (untiedmdeffn) E_WARN("Untied mdef will not be made\n"); if (countfn) E_WARN("Phone counts will not be generated\n"); untiedmdeffn = countfn = NULL; } phnlist = (char *)cmd_ln_access("-phnlstfn"); triphnlist = (char *)cmd_ln_access("-triphnlstfn"); incimdef = (char *)cmd_ln_access("-inCImdef"); incdmdef = (char *)cmd_ln_access("-inCDmdef"); if (!incdmdef && !incimdef && !phnlist && !triphnlist) E_FATAL("No input mdefs or phone list given\n"); if (triphnlist) { if (phnlist) E_WARN("Both -triphnlist %s and -phnlist given.\n",triphnlist); E_WARN("Ignoring -phnlist %s\n",phnlist); phnlist = triphnlist; } tph_list_given = (triphnlist || incdmdef) ? 1 : 0; if (incdmdef) { if (incimdef || phnlist){ E_WARN("Using only input CD mdef %s!\n",incdmdef); E_WARN("Using only triphones from input CD mdef %s!\n",incdmdef); if (incimdef) E_WARN("CImdef %s will be ignored\n",incimdef); if (phnlist) E_WARN("phonelist %s will be ignored\n",phnlist); incimdef = phnlist = NULL; } make_ci_list_cd_hash_frm_mdef(incdmdef,&CIlist,&cilistsize, &CDhash,&ncd); } else{ if (phnlist) make_ci_list_cd_hash_frm_phnlist(phnlist,&CIlist, &cilistsize,&CDhash,&ncd); if (incimdef) { if (CIlist) ckd_free_2d((void**)CIlist); make_ci_list_frm_mdef(incimdef,&CIlist,&cilistsize); } } if (cimdeffn) make_mdef_from_list(cimdeffn,CIlist,cilistsize,NULL,0,argv[0]); if (!tph_list_given && !cimdeffn) { read_dict(dictfn, fillerdictfn, &dicthash); if (CDhash) freehash(CDhash); make_dict_triphone_list (dicthash, &CDhash); } if (alltphnmdeffn){ threshold = -1; make_CD_heap(CDhash,threshold,&CDheap,&cdheapsize); make_mdef_from_list(alltphnmdeffn,CIlist,cilistsize, CDheap,cdheapsize,argv[0]); } if (countfn || untiedmdeffn) count_triphones(lsnfile, dicthash, CDhash, &CIhash); if (countfn){ print_counts(countfn,CIhash,CDhash); } if (untiedmdeffn){ threshold = find_threshold(CDhash); make_CD_heap(CDhash,threshold,&CDheap,&cdheapsize); make_mdef_from_list(untiedmdeffn,CIlist,cilistsize, CDheap,cdheapsize,argv[0]); } return 0; }
~indexed() { freehash(); }
void main(int argc, char **argv) { int i, j, a, mi, oi, tot, keywords; double totp, p, xp[MAXTAB]; Hash *hmsg; Word w; Stringtab *s, *t; Biobuf bout; mbest = 15; keywords = 0; ARGBEGIN{ case 'D': debug = 1; break; case 'k': keywords = 1; break; case 'm': mbest = atoi(EARGF(usage())); if(mbest > MAXBEST) sysfatal("cannot keep more than %d words", MAXBEST); break; default: usage(); }ARGEND for(i=0; i<argc; i++) if(strcmp(argv[i], "~") == 0) break; if(i > MAXTAB) sysfatal("cannot handle more than %d tables", MAXTAB); if(i+1 >= argc) usage(); for(i=0; i<argc; i++){ if(strcmp(argv[i], "~") == 0) break; tab[ntab].file = argv[i]; tab[ntab].hash = hread(argv[i]); s = findstab(tab[ntab].hash, "*nmsg*", 6, 1); if(s == nil || s->count == 0) tab[ntab].nmsg = 1; else tab[ntab].nmsg = s->count; ntab++; } Binit(&bout, 1, OWRITE); oi = ++i; for(a=i; a<argc; a++){ hmsg = hread(argv[a]); nbest = 0; for(s=hmsg->all; s; s=s->link){ w.s = s; tot = 0; totp = 0.0; for(i=0; i<ntab; i++){ t = findstab(tab[i].hash, s->str, s->n, 0); if(t == nil) w.count[i] = 0; else w.count[i] = t->count; tot += w.count[i]; p = w.count[i]/(double)tab[i].nmsg; if(p >= 1.0) p = 1.0; w.p[i] = p; totp += p; } if(tot < 5){ /* word does not appear enough; give to box 0 */ w.p[0] = 0.5; for(i=1; i<ntab; i++) w.p[i] = 0.1; w.mp = 0.5; w.mi = 0; noteword(&w); continue; } w.mp = 0.0; for(i=0; i<ntab; i++){ p = w.p[i]; p /= totp; if(p < 0.01) p = 0.01; else if(p > 0.99) p = 0.99; if(p > w.mp){ w.mp = p; w.mi = i; } w.p[i] = p; } noteword(&w); } totp = 0.0; for(i=0; i<ntab; i++){ p = 1.0; for(j=0; j<nbest; j++) p *= best[j].p[i]; xp[i] = p; totp += p; } for(i=0; i<ntab; i++) xp[i] /= totp; mi = 0; for(i=1; i<ntab; i++) if(xp[i] > xp[mi]) mi = i; if(oi != argc-1) Bprint(&bout, "%s: ", argv[a]); Bprint(&bout, "%s %f", tab[mi].file, xp[mi]); if(keywords){ for(i=0; i<nbest; i++){ Bprint(&bout, " "); Bwrite(&bout, best[i].s->str, best[i].s->n); Bprint(&bout, " %f", best[i].p[mi]); } } freehash(hmsg); Bprint(&bout, "\n"); if(debug){ for(i=0; i<nbest; i++){ Bwrite(&bout, best[i].s->str, best[i].s->n); Bprint(&bout, " %f", best[i].p[mi]); if(best[i].p[mi] < best[i].mp) Bprint(&bout, " (%f %s)", best[i].mp, tab[best[i].mi].file); Bprint(&bout, "\n"); } } } Bterm(&bout); }