ps_search_t * state_align_search_init(const char *name, cmd_ln_t *config, acmod_t *acmod, ps_alignment_t *al) { state_align_search_t *sas; ps_alignment_iter_t *itor; hmm_t *hmm; sas = ckd_calloc(1, sizeof(*sas)); ps_search_init(ps_search_base(sas), &state_align_search_funcs, PS_SEARCH_TYPE_STATE_ALIGN, name, config, acmod, al->d2p->dict, al->d2p); sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (sas->hmmctx == NULL) { ckd_free(sas); return NULL; } sas->al = al; /* Generate HMM vector from phone level of alignment. */ sas->n_phones = ps_alignment_n_phones(al); sas->n_emit_state = ps_alignment_n_states(al); sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms)); for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor; ++hmm, itor = ps_alignment_iter_next(itor)) { ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); hmm_init(sas->hmmctx, hmm, FALSE, ent->id.pid.ssid, ent->id.pid.tmatid); } return ps_search_base(sas); }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize penalty storage */ pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->window = cmd_ln_int32_r(config, "-pl_window"); if (pls->penalties) ckd_free(pls->penalties); pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); if (pls->pen_buf) ckd_free_2d(pls->pen_buf); pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); /* Initialize phone HMMs. */ if (pls->hmms) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->hmms[i]); ckd_free(pls->hmms); } pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); for (i = 0; i < pls->n_phones; ++i) { hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
static void init_hmm_unrelated(args_t *args) { int i,j; args->nstates = 7; args->tprob = (double*) malloc(sizeof(double)*args->nstates*args->nstates); for (i=0; i<args->nstates; i++) { for (j=0; j<args->nstates; j++) MAT(args->tprob,args->nstates,i,j) = args->pij; } MAT(args->tprob,args->nstates,UNRL_0101,UNRL_xxxx) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0110,UNRL_xxxx) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_x0x0,UNRL_0x0x) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0x0x) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_x00x,UNRL_0xx0) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0101,UNRL_0xx0) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0101,UNRL_x00x) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0110,UNRL_x0x0) = args->pij*args->pij; MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0101) = args->pij*args->pij; for (i=0; i<args->nstates; i++) { for (j=i+1; j<args->nstates; j++) MAT(args->tprob,args->nstates,i,j) = MAT(args->tprob,args->nstates,j,i); } for (i=0; i<args->nstates; i++) { double sum = 0; for (j=0; j<args->nstates; j++) if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j); MAT(args->tprob,args->nstates,i,i) = 1 - sum; } #if 0 for (i=0; i<args->nstates; i++) { for (j=0; j<args->nstates; j++) fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i)); fprintf(stderr,"\n"); } #endif args->hmm = hmm_init(args->nstates, args->tprob, 10000); }
static void ngram_fwdflat_allocate_1ph(ngram_search_t *ngs) { dict_t *dict = ps_search_dict(ngs); int n_words = ps_search_n_words(ngs); int i, w; /* Allocate single-phone words, since they won't have * been allocated for us by fwdtree initialization. */ ngs->n_1ph_words = 0; for (w = 0; w < n_words; w++) { if (dict_is_single_phone(dict, w)) ++ngs->n_1ph_words; } ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->single_phone_wid)); ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph)); i = 0; for (w = 0; w < n_words; w++) { if (!dict_is_single_phone(dict, w)) continue; /* DICT2PID location */ ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w); ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef); hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE, /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone), /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone)); ngs->rhmm_1ph[i].next = NULL; ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]); ngs->single_phone_wid[i] = w; i++; } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ int32 tmatid; gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; ++n_lc_alloc; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } }
/* segment using HMM and then histogram clustering */ void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) { int i, j; /*****************************/ if (0) { /* try just using the predominant bin number as a 'decoded state' */ nHMM_states = feature_length + 1; /* allow a 'zero' state */ double chroma_thresh = 0.05; double maxval; int maxbin; for (i = 0; i < frames_read; i++) { maxval = 0; for (j = 0; j < feature_length; j++) { if (features[i][j] > maxval) { maxval = features[i][j]; maxbin = j; } } if (maxval > chroma_thresh) q[i] = maxbin; else q[i] = feature_length; } } if (1) { /*****************************/ /* scale all the features to 'balance covariances' during HMM training */ double scale = 10; for (i = 0; i < frames_read; i++) for (j = 0; j < feature_length; j++) features[i][j] *= scale; /* train an HMM on the features */ /* create a model */ model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); /* train the model */ hmm_train(features, frames_read, model); /* printf("\n\nafter training:\n"); hmm_print(model); */ /* decode the hidden state sequence */ viterbi_decode(features, frames_read, model, q); hmm_close(model); /*****************************/ } /*****************************/ /* fprintf(stderr, "HMM state sequence:\n"); for (i = 0; i < frames_read; i++) fprintf(stderr, "%d ", q[i]); fprintf(stderr, "\n\n"); */ /* create histograms of states */ double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ create_histograms(q, frames_read, nHMM_states, histogram_length, h); /* cluster the histograms */ int nbsched = 20; /* length of inverse temperature schedule */ double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ double b0 = 100; double alpha = 0.7; bsched[0] = b0; for (i = 1; i < nbsched; i++) bsched[i] = alpha * bsched[i-1]; cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); /* now q holds a sequence of cluster assignments */ free(h); free(bsched); }
/** * Build net from phone HMMs */ static int phmm_build(allphone_search_t * allphs) { phmm_t *p, **pid2phmm; bin_mdef_t *mdef; int32 lrc_size; uint32 *lc, *rc; s3pid_t pid; s3cipid_t ci; s3cipid_t *filler; int n_phmm, n_link; int i, nphone; mdef = ((ps_search_t *) allphs)->acmod->mdef; allphs->ci_phmm = (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *)); pid2phmm = (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *)); /* For each unique ciphone/triphone entry in mdef, create a PHMM node */ n_phmm = 0; nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef); E_INFO("Building PHMM net of %d phones\n", nphone); for (pid = 0; pid < nphone; pid++) { if ((p = phmm_lookup(allphs, pid)) == NULL) { //not found, should be created p = (phmm_t *) ckd_calloc(1, sizeof(*p)); hmm_init(allphs->hmmctx, &(p->hmm), FALSE, mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat); p->pid = pid; p->ci = bin_mdef_pid2ci(mdef, pid); p->succlist = NULL; p->next = allphs->ci_phmm[p->ci]; allphs->ci_phmm[p->ci] = p; n_phmm++; } pid2phmm[pid] = p; } /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */ lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef)); lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t)); rc = lc + (n_phmm * lrc_size); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = allphs->ci_phmm[ci]; p; p = p->next) { p->lc = lc; lc += lrc_size; p->rc = rc; rc += lrc_size; } } /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */ filler = (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); /* Connect fillers */ i = 0; for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) { p = pid2phmm[ci]; bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef)); bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef)); if (mdef->phone[ci].info.ci.filler) { filler[i++] = ci; } } filler[i] = BAD_S3CIPID; /* Loop over cdphones only if ci_only is not set */ for (pid = bin_mdef_n_ciphone(mdef); pid < nphone; pid++) { p = pid2phmm[pid]; if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->lc, filler[i]); } else bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]); if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->rc, filler[i]); } else bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]); } ckd_free(pid2phmm); ckd_free(filler); /* Create links between PHMM nodes */ n_link = phmm_link(allphs); E_INFO("%d nodes, %d links\n", n_phmm, n_link); return 0; }
/** * Build HMM network for one utterance of fwdflat search. */ static void build_fwdflat_chan(ngram_search_t *ngs) { int32 i, wid, p; root_chan_t *rhmm; chan_t *hmm, *prevhmm; dict_t *dict; dict2pid_t *d2p; dict = ps_search_dict(ngs); d2p = ps_search_dict2pid(ngs); /* Build word HMMs for each word in the lattice. */ for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { wid = ngs->fwdflat_wordlist[i]; /* Single-phone words are permanently allocated */ if (dict_is_single_phone(dict, wid)) continue; assert(ngs->word_chan[wid] == NULL); /* Multiplex root HMM for first phone (one root per word, flat * lexicon). diphone is irrelevant here, for the time being, * at least. */ rhmm = listelem_malloc(ngs->root_chan_alloc); rhmm->ci2phone = dict_second_phone(dict, wid); rhmm->ciphone = dict_first_phone(dict, wid); rhmm->next = NULL; hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone)); /* HMMs for word-internal phones */ prevhmm = NULL; for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) { hmm = listelem_malloc(ngs->chan_alloc); hmm->ciphone = dict_pron(dict, wid, p); hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1; hmm->next = NULL; hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, dict2pid_internal(d2p,wid,p), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone)); if (prevhmm) prevhmm->next = hmm; else rhmm->next = hmm; prevhmm = hmm; } /* Right-context phones */ ngram_search_alloc_all_rc(ngs, wid); /* Link in just allocated right-context phones */ if (prevhmm) prevhmm->next = ngs->word_chan[wid]; else rhmm->next = ngs->word_chan[wid]; ngs->word_chan[wid] = (chan_t *) rhmm; } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }
static void init_data(args_t *args) { args->prev_rid = args->skip_rid = -1; args->hdr = args->files->readers[0].header; if ( !args->sample ) { if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n"); args->sample = strdup(args->hdr->samples[0]); } if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n"); // Set samples kstring_t str = {0,0,0}; if ( args->estimate_AF && strcmp("-",args->estimate_AF) ) { int i, n; char **smpls = hts_readlist(args->estimate_AF, 1, &n); // Make sure the query sample is included for (i=0; i<n; i++) if ( !strcmp(args->sample,smpls[i]) ) break; // Add the query sample if not present if ( i!=n ) kputs(args->sample, &str); for (i=0; i<n; i++) { if ( str.l ) kputc(',', &str); kputs(smpls[i], &str); free(smpls[i]); } free(smpls); } else if ( !args->estimate_AF ) kputs(args->sample, &str); if ( str.l ) { int ret = bcf_hdr_set_samples(args->hdr, str.s, 0); if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s); else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret); } if ( args->af_tag ) if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) ) error("No such INFO tag in the VCF: %s\n", args->af_tag); args->nsmpl = bcf_hdr_nsamples(args->hdr); args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample); free(str.s); int i; for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.); // Init transition matrix and HMM double tprob[4]; MAT(tprob,2,STATE_HW,STATE_HW) = 1 - args->t2AZ; MAT(tprob,2,STATE_HW,STATE_AZ) = args->t2HW; MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ; MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; if ( args->genmap_fname ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_genmap, args); } else if ( args->rec_rate > 0 ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_recrate, args); } else args->hmm = hmm_init(2, tprob, 10000); // print header printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version()); printf("# The command line was:\tbcftools %s", args->argv[0]); for (i=1; i<args->argc; i++) printf(" %s",args->argv[i]); printf("\n#\n"); printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n"); }
static void init_hmm_trio(args_t *args) { int i,j; args->nstates = 8; args->tprob = (double*) malloc(sizeof(double)*args->nstates*args->nstates); for (i=0; i<args->nstates; i++) for (j=0; j<args->nstates; j++) hap_switch[i][j] = 0; hap_switch[TRIO_AD][TRIO_AC] = SW_FATHER; hap_switch[TRIO_BC][TRIO_AC] = SW_MOTHER; hap_switch[TRIO_BD][TRIO_AC] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_AC][TRIO_AD] = SW_FATHER; hap_switch[TRIO_BC][TRIO_AD] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_BD][TRIO_AD] = SW_MOTHER; hap_switch[TRIO_AC][TRIO_BC] = SW_MOTHER; hap_switch[TRIO_AD][TRIO_BC] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_BD][TRIO_BC] = SW_FATHER; hap_switch[TRIO_AC][TRIO_BD] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_AD][TRIO_BD] = SW_MOTHER; hap_switch[TRIO_BC][TRIO_BD] = SW_FATHER; hap_switch[TRIO_DA][TRIO_CA] = SW_FATHER; hap_switch[TRIO_CB][TRIO_CA] = SW_MOTHER; hap_switch[TRIO_DB][TRIO_CA] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_CA][TRIO_DA] = SW_FATHER; hap_switch[TRIO_CB][TRIO_DA] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_DB][TRIO_DA] = SW_MOTHER; hap_switch[TRIO_CA][TRIO_CB] = SW_MOTHER; hap_switch[TRIO_DA][TRIO_CB] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_DB][TRIO_CB] = SW_FATHER; hap_switch[TRIO_CA][TRIO_DB] = SW_MOTHER | SW_FATHER; hap_switch[TRIO_DA][TRIO_DB] = SW_MOTHER; hap_switch[TRIO_CB][TRIO_DB] = SW_FATHER; for (i=0; i<args->nstates; i++) { for (j=0; j<args->nstates; j++) { if ( !hap_switch[i][j] ) MAT(args->tprob,args->nstates,i,j) = 0; else { MAT(args->tprob,args->nstates,i,j) = 1; if ( hap_switch[i][j] & SW_MOTHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij; if ( hap_switch[i][j] & SW_FATHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij; } } } for (i=0; i<args->nstates; i++) { double sum = 0; for (j=0; j<args->nstates; j++) { if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j); } MAT(args->tprob,args->nstates,i,i) = 1 - sum; } #if 0 for (i=0; i<args->nstates; i++) { for (j=0; j<args->nstates; j++) fprintf(stderr,"\t%d",hap_switch[j][i]); fprintf(stderr,"\n"); } for (i=0; i<args->nstates; i++) { for (j=0; j<args->nstates; j++) fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i)); fprintf(stderr,"\n"); } #endif args->hmm = hmm_init(args->nstates, args->tprob, 10000); }