Пример #1
0
ps_search_t *
state_align_search_init(const char *name,
                        cmd_ln_t *config,
                        acmod_t *acmod,
                        ps_alignment_t *al)
{
    state_align_search_t *sas;
    ps_alignment_iter_t *itor;
    hmm_t *hmm;

    sas = ckd_calloc(1, sizeof(*sas));
    ps_search_init(ps_search_base(sas), &state_align_search_funcs,
		   PS_SEARCH_TYPE_STATE_ALIGN, name,
                   config, acmod, al->d2p->dict, al->d2p);
    sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
                                   acmod->tmat->tp, NULL, acmod->mdef->sseq);
    if (sas->hmmctx == NULL) {
        ckd_free(sas);
        return NULL;
    }
    sas->al = al;

    /* Generate HMM vector from phone level of alignment. */
    sas->n_phones = ps_alignment_n_phones(al);
    sas->n_emit_state = ps_alignment_n_states(al);
    sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms));
    for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor;
         ++hmm, itor = ps_alignment_iter_next(itor)) {
        ps_alignment_entry_t *ent = ps_alignment_iter_get(itor);
        hmm_init(sas->hmmctx, hmm, FALSE,
                 ent->id.pid.ssid, ent->id.pid.tmatid);
    }
    return ps_search_base(sas);
}
static int
phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    cmd_ln_t *config = ps_search_config(search);
    acmod_t *acmod = ps_search_acmod(search);
    int i;

    /* Free old dict2pid, dict, if necessary. */
    ps_search_base_reinit(search, dict, d2p);

    /* Initialize HMM context. */
    if (pls->hmmctx)
        hmm_context_free(pls->hmmctx);
    pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
                                   acmod->tmat->tp, NULL, acmod->mdef->sseq);
    if (pls->hmmctx == NULL)
        return -1;

    /* Initialize penalty storage */
    pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
    pls->window = cmd_ln_int32_r(config, "-pl_window");
    if (pls->penalties)
        ckd_free(pls->penalties);
    pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties));
    if (pls->pen_buf)
        ckd_free_2d(pls->pen_buf);
    pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf));

    /* Initialize phone HMMs. */
    if (pls->hmms) {
        for (i = 0; i < pls->n_phones; ++i)
            hmm_deinit((hmm_t *)&pls->hmms[i]);
        ckd_free(pls->hmms);
    }
    pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms));
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i],
                 FALSE,
                 bin_mdef_pid2ssid(acmod->mdef, i),
                 bin_mdef_pid2tmatid(acmod->mdef, i));
    }
    pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight");
    pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT;
    pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT;
    pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT;
    E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
           pls->beam, pls->pbeam, pls->pip);

    return 0;
}
Пример #3
0
static void init_hmm_unrelated(args_t *args)
{
    int i,j;
    args->nstates = 7;
    args->tprob   = (double*) malloc(sizeof(double)*args->nstates*args->nstates);

    for (i=0; i<args->nstates; i++)
    {
        for (j=0; j<args->nstates; j++)
            MAT(args->tprob,args->nstates,i,j) = args->pij;
    }
    MAT(args->tprob,args->nstates,UNRL_0101,UNRL_xxxx) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0110,UNRL_xxxx) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_x0x0,UNRL_0x0x) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0x0x) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_x00x,UNRL_0xx0) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0101,UNRL_0xx0) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0101,UNRL_x00x) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0110,UNRL_x0x0) = args->pij*args->pij;
    MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0101) = args->pij*args->pij;

    for (i=0; i<args->nstates; i++)
    {
        for (j=i+1; j<args->nstates; j++)
            MAT(args->tprob,args->nstates,i,j) = MAT(args->tprob,args->nstates,j,i);
    }
    for (i=0; i<args->nstates; i++)
    {
        double sum = 0;
        for (j=0; j<args->nstates; j++)
            if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
        MAT(args->tprob,args->nstates,i,i) = 1 - sum;
    }

    #if 0
    for (i=0; i<args->nstates; i++)
    {
        for (j=0; j<args->nstates; j++)
            fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
        fprintf(stderr,"\n");
    }
    #endif

    args->hmm = hmm_init(args->nstates, args->tprob, 10000);
}
static void
ngram_fwdflat_allocate_1ph(ngram_search_t *ngs)
{
    dict_t *dict = ps_search_dict(ngs);
    int n_words = ps_search_n_words(ngs);
    int i, w;

    /* Allocate single-phone words, since they won't have
     * been allocated for us by fwdtree initialization. */
    ngs->n_1ph_words = 0;
    for (w = 0; w < n_words; w++) {
        if (dict_is_single_phone(dict, w))
            ++ngs->n_1ph_words;
    }
    ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
                                       sizeof(*ngs->single_phone_wid));
    ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
    i = 0;
    for (w = 0; w < n_words; w++) {
        if (!dict_is_single_phone(dict, w))
            continue;

        /* DICT2PID location */
        ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
        ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
        hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
                 /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef,
                                              ngs->rhmm_1ph[i].ciphone),
                 /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
    						  ngs->rhmm_1ph[i].ciphone));
        ngs->rhmm_1ph[i].next = NULL;
        ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
        ngs->single_phone_wid[i] = w;
        i++;
    }
}
Пример #5
0
/*
 * Add the word emitted by the given transition (fsglink) to the given lextree
 * (rooted at root), and return the new lextree root.  (There may actually be
 * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
 * "root" is the head of this list.)
 * lclist, rclist: sets of left and right context phones for this link.
 * alloc_head: head of a linear list of all allocated pnodes for the parent
 * FSG state, kept elsewhere and updated by this routine.
 */
static fsg_pnode_t *
psubtree_add_trans(fsg_lextree_t *lextree, 
                   fsg_pnode_t * root,
                   fsg_glist_linklist_t **curglist,
                   fsg_link_t * fsglink,
                   int16 *lclist, int16 *rclist,
                   fsg_pnode_t ** alloc_head)
{
    int32 silcipid;             /* Silence CI phone ID */
    int32 pronlen;              /* Pronunciation length */
    int32 wid;                  /* FSG (not dictionary!!) word ID */
    int32 dictwid;              /* Dictionary (not FSG!!) word ID */
    int32 ssid;                 /* Senone Sequence ID */
    int32 tmatid;
    gnode_t *gn;
    fsg_pnode_t *pnode, *pred, *head;
    int32 n_ci, p, lc, rc;
    glist_t lc_pnodelist;       /* Temp pnodes list for different left contexts */
    glist_t rc_pnodelist;       /* Temp pnodes list for different right contexts */
    int32 i, j;
    int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;

    silcipid = bin_mdef_silphone(lextree->mdef);
    n_ci = bin_mdef_n_ciphone(lextree->mdef);

    wid = fsg_link_wid(fsglink);
    assert(wid >= 0);           /* Cannot be a null transition */
    dictwid = dict_wordid(lextree->dict,
                          fsg_model_word_str(lextree->fsg, wid));
    pronlen = dict_pronlen(lextree->dict, dictwid);
    assert(pronlen >= 1);

    assert(lclist[0] >= 0);     /* At least one phonetic context provided */
    assert(rclist[0] >= 0);

    head = *alloc_head;
    pred = NULL;

    if (pronlen == 1) {         /* Single-phone word */
        int ci = dict_first_phone(lextree->dict, dictwid);
        /* Only non-filler words are mpx */
        if (dict_filler_word(lextree->dict, dictwid)) {
            /*
             * Left diphone ID for single-phone words already assumes SIL is right
             * context; only left contexts need to be handled.
             */
            lc_pnodelist = NULL;

            for (i = 0; lclist[i] >= 0; i++) {
                lc = lclist[i];
                ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
                tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
                /* Check if this ssid already allocated for some other context */
                for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                    pnode = (fsg_pnode_t *) gnode_ptr(gn);

                    if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
                        /* already allocated; share it for this context phone */
                        fsg_pnode_add_ctxt(pnode, lc);
                        break;
                    }
                }

                if (!gn) {      /* ssid not already allocated */
                    pnode =
                        (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                    pnode->ctx = lextree->ctx;
                    pnode->next.fsglink = fsglink;
                    pnode->logs2prob =
                        (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
                        + lextree->wip + lextree->pip;
                    pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                    pnode->ppos = 0;
                    pnode->leaf = TRUE;
                    pnode->sibling = root;      /* All root nodes linked together */
                    fsg_pnode_add_ctxt(pnode, lc);      /* Initially zeroed by calloc above */
                    pnode->alloc_next = head;
                    head = pnode;
                    root = pnode;
                    ++n_lc_alloc;

                    hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);

                    lc_pnodelist =
                        glist_add_ptr(lc_pnodelist, (void *) pnode);
                }
            }
Пример #6
0
/* segment using HMM and then histogram clustering */
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 
					 int histogram_length, int nclusters, int neighbour_limit)
{
	int i, j;
	
	/*****************************/
	if (0) {
	/* try just using the predominant bin number as a 'decoded state' */
	nHMM_states = feature_length + 1;	/* allow a 'zero' state */
	double chroma_thresh = 0.05;
	double maxval;
	int maxbin;
	for (i = 0; i < frames_read; i++)
	{
		maxval = 0;
		for (j = 0; j < feature_length; j++)
		{
			if (features[i][j] > maxval) 
			{
				maxval = features[i][j];
				maxbin = j;
			}				
		}
		if (maxval > chroma_thresh)
			q[i] = maxbin;
		else
			q[i] = feature_length;
	}
	
	}
	if (1) {
	/*****************************/
		
	
	/* scale all the features to 'balance covariances' during HMM training */
	double scale = 10;
	for (i = 0; i < frames_read; i++)
		for (j = 0; j < feature_length; j++)
			features[i][j] *= scale;
	
	/* train an HMM on the features */
	
	/* create a model */
	model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
	
	/* train the model */
	hmm_train(features, frames_read, model);
/*	
	printf("\n\nafter training:\n");
	hmm_print(model);
*/	
	/* decode the hidden state sequence */
	viterbi_decode(features, frames_read, model, q);  
	hmm_close(model);
	
	/*****************************/
	}
	/*****************************/
	
    
/*
	fprintf(stderr, "HMM state sequence:\n");
	for (i = 0; i < frames_read; i++)
		fprintf(stderr, "%d ", q[i]);
	fprintf(stderr, "\n\n");
*/
	
	/* create histograms of states */
	double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double));	/* vector in row major order */
	create_histograms(q, frames_read, nHMM_states, histogram_length, h);
	
	/* cluster the histograms */
	int nbsched = 20;	/* length of inverse temperature schedule */
	double* bsched = (double*) malloc(nbsched*sizeof(double));	/* inverse temperature schedule */
	double b0 = 100;
	double alpha = 0.7;
	bsched[0] = b0;
	for (i = 1; i < nbsched; i++)
		bsched[i] = alpha * bsched[i-1];
	cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
	
	/* now q holds a sequence of cluster assignments */
	
	free(h);  
	free(bsched);
}
/**
 * Build net from phone HMMs
 */
static int
phmm_build(allphone_search_t * allphs)
{
    phmm_t *p, **pid2phmm;
    bin_mdef_t *mdef;
    int32 lrc_size;
    uint32 *lc, *rc;
    s3pid_t pid;
    s3cipid_t ci;
    s3cipid_t *filler;
    int n_phmm, n_link;
    int i, nphone;

    mdef = ((ps_search_t *) allphs)->acmod->mdef;
    allphs->ci_phmm =
        (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *));
    pid2phmm =
        (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *));

    /* For each unique ciphone/triphone entry in mdef, create a PHMM node */
    n_phmm = 0;
    nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
    E_INFO("Building PHMM net of %d phones\n", nphone);
    for (pid = 0; pid < nphone; pid++) {
        if ((p = phmm_lookup(allphs, pid)) == NULL) {
            //not found, should be created
            p = (phmm_t *) ckd_calloc(1, sizeof(*p));
            hmm_init(allphs->hmmctx, &(p->hmm), FALSE,
                     mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat);
            p->pid = pid;
            p->ci = bin_mdef_pid2ci(mdef, pid);
            p->succlist = NULL;
            p->next = allphs->ci_phmm[p->ci];
            allphs->ci_phmm[p->ci] = p;
            n_phmm++;
        }
        pid2phmm[pid] = p;
    }

    /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */
    lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
    lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t));
    rc = lc + (n_phmm * lrc_size);
    for (ci = 0; ci < mdef->n_ciphone; ci++) {
        for (p = allphs->ci_phmm[ci]; p; p = p->next) {
            p->lc = lc;
            lc += lrc_size;
            p->rc = rc;
            rc += lrc_size;
        }
    }

    /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */
    filler =
        (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
                                 sizeof(s3cipid_t));

    /* Connect fillers */
    i = 0;
    for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
        p = pid2phmm[ci];
        bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef));
        bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef));
        if (mdef->phone[ci].info.ci.filler) {
            filler[i++] = ci;
        }
    }
    filler[i] = BAD_S3CIPID;


    /* Loop over cdphones only if ci_only is not set */
    for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
         pid++) {
        p = pid2phmm[pid];

        if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) {
            for (i = 0; IS_S3CIPID(filler[i]); i++)
                bitvec_set(p->lc, filler[i]);
        }
        else
            bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]);

        if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) {
            for (i = 0; IS_S3CIPID(filler[i]); i++)
                bitvec_set(p->rc, filler[i]);
        }
        else
            bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]);
    }
    ckd_free(pid2phmm);
    ckd_free(filler);

    /* Create links between PHMM nodes */
    n_link = phmm_link(allphs);

    E_INFO("%d nodes, %d links\n", n_phmm, n_link);
    return 0;
}
/**
 * Build HMM network for one utterance of fwdflat search.
 */
static void
build_fwdflat_chan(ngram_search_t *ngs)
{
    int32 i, wid, p;
    root_chan_t *rhmm;
    chan_t *hmm, *prevhmm;
    dict_t *dict;
    dict2pid_t *d2p;

    dict = ps_search_dict(ngs);
    d2p = ps_search_dict2pid(ngs);

    /* Build word HMMs for each word in the lattice. */
    for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
        wid = ngs->fwdflat_wordlist[i];

        /* Single-phone words are permanently allocated */
        if (dict_is_single_phone(dict, wid))
            continue;

        assert(ngs->word_chan[wid] == NULL);

        /* Multiplex root HMM for first phone (one root per word, flat
         * lexicon).  diphone is irrelevant here, for the time being,
         * at least. */
        rhmm = listelem_malloc(ngs->root_chan_alloc);
        rhmm->ci2phone = dict_second_phone(dict, wid);
        rhmm->ciphone = dict_first_phone(dict, wid);
        rhmm->next = NULL;
        hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
                 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
                 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone));

        /* HMMs for word-internal phones */
        prevhmm = NULL;
        for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) {
            hmm = listelem_malloc(ngs->chan_alloc);
            hmm->ciphone = dict_pron(dict, wid, p);
            hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1;
            hmm->next = NULL;
            hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
                     dict2pid_internal(d2p,wid,p), 
		     bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone));

            if (prevhmm)
                prevhmm->next = hmm;
            else
                rhmm->next = hmm;

            prevhmm = hmm;
        }

        /* Right-context phones */
        ngram_search_alloc_all_rc(ngs, wid);

        /* Link in just allocated right-context phones */
        if (prevhmm)
            prevhmm->next = ngs->word_chan[wid];
        else
            rhmm->next = ngs->word_chan[wid];
        ngs->word_chan[wid] = (chan_t *) rhmm;
    }

}
Пример #9
0
/*
 * Add the word emitted by the given transition (fsglink) to the given lextree
 * (rooted at root), and return the new lextree root.  (There may actually be
 * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
 * "root" is the head of this list.)
 * lclist, rclist: sets of left and right context phones for this link.
 * alloc_head: head of a linear list of all allocated pnodes for the parent
 * FSG state, kept elsewhere and updated by this routine.
 */
static fsg_pnode_t *
psubtree_add_trans(fsg_lextree_t *lextree, 
                   fsg_pnode_t * root,
                   fsg_glist_linklist_t **curglist,
                   fsg_link_t * fsglink,
                   int16 *lclist, int16 *rclist,
                   fsg_pnode_t ** alloc_head)
{
    int32 silcipid;             /* Silence CI phone ID */
    int32 pronlen;              /* Pronunciation length */
    int32 wid;                  /* FSG (not dictionary!!) word ID */
    int32 dictwid;              /* Dictionary (not FSG!!) word ID */
    int32 ssid;                 /* Senone Sequence ID */
    gnode_t *gn;
    fsg_pnode_t *pnode, *pred, *head;
    int32 n_ci, p, lc, rc;
    glist_t lc_pnodelist;       /* Temp pnodes list for different left contexts */
    glist_t rc_pnodelist;       /* Temp pnodes list for different right contexts */
    int32 i, j;

    silcipid = bin_mdef_silphone(lextree->mdef);
    n_ci = bin_mdef_n_ciphone(lextree->mdef);

    wid = fsg_link_wid(fsglink);
    assert(wid >= 0);           /* Cannot be a null transition */
    dictwid = dict_wordid(lextree->dict,
                          fsg_model_word_str(lextree->fsg, wid));
    pronlen = dict_pronlen(lextree->dict, dictwid);
    assert(pronlen >= 1);

    assert(lclist[0] >= 0);     /* At least one phonetic context provided */
    assert(rclist[0] >= 0);

    head = *alloc_head;
    pred = NULL;

    if (pronlen == 1) {         /* Single-phone word */
        int ci = dict_first_phone(lextree->dict, dictwid);
        /* Only non-filler words are mpx */
        if (dict_filler_word(lextree->dict, dictwid)) {
            /*
             * Left diphone ID for single-phone words already assumes SIL is right
             * context; only left contexts need to be handled.
             */
            lc_pnodelist = NULL;

            for (i = 0; lclist[i] >= 0; i++) {
                lc = lclist[i];
                ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
                /* Check if this ssid already allocated for some other context */
                for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                    pnode = (fsg_pnode_t *) gnode_ptr(gn);

                    if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
                        /* already allocated; share it for this context phone */
                        fsg_pnode_add_ctxt(pnode, lc);
                        break;
                    }
                }

                if (!gn) {      /* ssid not already allocated */
                    pnode =
                        (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                    pnode->ctx = lextree->ctx;
                    pnode->next.fsglink = fsglink;
                    pnode->logs2prob =
                        fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
                    pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                    pnode->ppos = 0;
                    pnode->leaf = TRUE;
                    pnode->sibling = root;      /* All root nodes linked together */
                    fsg_pnode_add_ctxt(pnode, lc);      /* Initially zeroed by calloc above */
                    pnode->alloc_next = head;
                    head = pnode;
                    root = pnode;

                    hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                    lc_pnodelist =
                        glist_add_ptr(lc_pnodelist, (void *) pnode);
                }
            }

            glist_free(lc_pnodelist);
        }
        else {                  /* Filler word; no context modelled */
            ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */

            pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
            pnode->ctx = lextree->ctx;
            pnode->next.fsglink = fsglink;
            pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
            pnode->ci_ext = silcipid;   /* Presents SIL as context to neighbors */
            pnode->ppos = 0;
            pnode->leaf = TRUE;
            pnode->sibling = root;
            fsg_pnode_add_all_ctxt(&(pnode->ctxt));
            pnode->alloc_next = head;
            head = pnode;
            root = pnode;

            hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
        }
    }
    else {                      /* Multi-phone word */
        fsg_pnode_t **ssid_pnode_map;       /* Temp array of ssid->pnode mapping */
        ssid_pnode_map =
            (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
        lc_pnodelist = NULL;
        rc_pnodelist = NULL;

        for (p = 0; p < pronlen; p++) {
            int ci = dict_pron(lextree->dict, dictwid, p);
            if (p == 0) {       /* Root phone, handle required left contexts */
                /* Find if we already have an lc_pnodelist for the first phone of this word */
		fsg_glist_linklist_t *predglist=*curglist;
		fsg_glist_linklist_t *glist=*curglist;

                rc = dict_pron(lextree->dict, dictwid, 1);
		while (glist && glist->glist && glist->ci != ci && glist->rc != rc){
		    glist = glist->next;
		}
		if (glist && glist->ci == ci && glist->rc == rc && glist->glist) {
		    /* We've found a valid glist. Hook to it and move to next phoneme */
		    lc_pnodelist = glist->glist;
                    /* Set the predecessor node for the future tree first */
		    pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
		    continue;
		}
		else {
		    /* Two cases that can bring us here
		     * a. glist == NULL, i.e. end of current list. Create new entry.
		     * b. glist->glist == NULL, i.e. first entry into list.
		     */
		    if (!glist) { /* Case a; reduce it to case b by allocing glist */
		        glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
			glist->next = predglist;
                        *curglist = glist;
		    }
		    glist->ci = ci;
                    glist->rc = rc;
                    glist->lc = -1;
		    lc_pnodelist = glist->glist = NULL; /* Gets created below */
		}

                for (i = 0; lclist[i] >= 0; i++) {
                    lc = lclist[i];
                    ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
                    /* Compression is not done by d2p, so we do it
                     * here.  This might be slow, but it might not
                     * be... we'll see. */
                    pnode = ssid_pnode_map[0];
                    for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
                        pnode = ssid_pnode_map[j];
                        if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
                            break;
                    }
                    assert(j < n_ci);
                    if (!pnode) {       /* Allocate pnode for this new ssid */
                        pnode =
                            (fsg_pnode_t *) ckd_calloc(1,
                                                       sizeof
                                                       (fsg_pnode_t));
                        pnode->ctx = lextree->ctx;
	                /* This bit is tricky! For now we'll put the prob in the final link only */
                        /* pnode->logs2prob = fsg_link_logs2prob(fsglink)
                           + lextree->wip + lextree->pip; */
                        pnode->logs2prob = lextree->wip + lextree->pip;
                        pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                        pnode->ppos = 0;
                        pnode->leaf = FALSE;
                        pnode->sibling = root;  /* All root nodes linked together */
                        pnode->alloc_next = head;
                        head = pnode;
                        root = pnode;

                        hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                        lc_pnodelist =
                            glist_add_ptr(lc_pnodelist, (void *) pnode);
                        ssid_pnode_map[j] = pnode;
                    }
                    fsg_pnode_add_ctxt(pnode, lc);
                }
		/* Put the lc_pnodelist back into glist */
		glist->glist = lc_pnodelist;

                /* The predecessor node for the future tree is the root */
		pred = root;
            }
            else if (p != pronlen - 1) {        /* Word internal phone */
                fsg_pnode_t    *pnodeyoungest;

                ssid = dict2pid_internal(lextree->d2p, dictwid, p);
	        /* First check if we already have this ssid in our tree */
		pnode = pred->next.succ;
		pnodeyoungest = pnode; /* The youngest sibling */
		while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
		    pnode = pnode->sibling;
		}
		if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
		    /* Found the ssid; go to next phoneme */
		    pred = pnode;
		    continue;
		}

		/* pnode not found, allocate it */
                pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                pnode->ctx = lextree->ctx;
                pnode->logs2prob = lextree->pip;
                pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
                pnode->ppos = p;
                pnode->leaf = FALSE;
                pnode->sibling = pnodeyoungest; /* May be NULL */
                if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
                    for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                        pred = (fsg_pnode_t *) gnode_ptr(gn);
                        pred->next.succ = pnode;
                    }
                }
                else {          /* Predecessor = word internal node */
                    pred->next.succ = pnode;
                }
                pnode->alloc_next = head;
                head = pnode;

                hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                pred = pnode;
            }
            else {              /* Leaf phone, handle required right contexts */
	        /* Note, leaf phones are not part of the tree */
                xwdssid_t *rssid;
                memset((void *) ssid_pnode_map, 0,
                       n_ci * sizeof(fsg_pnode_t *));
                lc = dict_pron(lextree->dict, dictwid, p-1);
                rssid = dict2pid_rssid(lextree->d2p, ci, lc);

                for (i = 0; rclist[i] >= 0; i++) {
                    rc = rclist[i];

                    j = rssid->cimap[rc];
                    ssid = rssid->ssid[j];
                    pnode = ssid_pnode_map[j];

                    if (!pnode) {       /* Allocate pnode for this new ssid */
                        pnode =
                            (fsg_pnode_t *) ckd_calloc(1,
                                                       sizeof
                                                       (fsg_pnode_t));
                        pnode->ctx = lextree->ctx;
			/* We are plugging the word prob here. Ugly */
                        /* pnode->logs2prob = lextree->pip; */
                        pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip;
                        pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
                        pnode->ppos = p;
                        pnode->leaf = TRUE;
                        pnode->sibling = rc_pnodelist ?
                            (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
                        pnode->next.fsglink = fsglink;
                        pnode->alloc_next = head;
                        head = pnode;

                        hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                        rc_pnodelist =
                            glist_add_ptr(rc_pnodelist, (void *) pnode);
                        ssid_pnode_map[j] = pnode;
                    }
                    else {
                        assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
                    }
                    fsg_pnode_add_ctxt(pnode, rc);
                }

                if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
                    for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                        pred = (fsg_pnode_t *) gnode_ptr(gn);
                        if (!pred->next.succ)
                            pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                        else {
                            /* Link to the end of the sibling chain */
                            fsg_pnode_t *succ = pred->next.succ;
                            while (succ->sibling) succ = succ->sibling;
                            succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
                            /* Since all entries of lc_pnodelist point
                               to the same array, sufficient to update it once */
                            break; 
                        }
                    }
                }
                else {          /* Predecessor = word internal node */
                    if (!pred->next.succ)
                        pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                    else {
                        /* Link to the end of the sibling chain */
                        fsg_pnode_t *succ = pred->next.succ;
                        while (succ->sibling) succ = succ->sibling;
                        succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                    }
                }
            }
        }

        ckd_free((void *) ssid_pnode_map);
        /* glist_free(lc_pnodelist);  Nope; this gets freed outside */
        glist_free(rc_pnodelist);
    }

    *alloc_head = head;

    return root;
}
Пример #10
0
static void init_data(args_t *args)
{
    args->prev_rid = args->skip_rid = -1;
    args->hdr = args->files->readers[0].header;

    if ( !args->sample )
    {
        if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n");
        args->sample = strdup(args->hdr->samples[0]);
    }
    if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n");

    // Set samples
    kstring_t str = {0,0,0};
    if ( args->estimate_AF && strcmp("-",args->estimate_AF) )
    {
        int i, n;
        char **smpls = hts_readlist(args->estimate_AF, 1, &n);

        // Make sure the query sample is included
        for (i=0; i<n; i++)
            if ( !strcmp(args->sample,smpls[i]) ) break;

        // Add the query sample if not present
        if ( i!=n ) kputs(args->sample, &str);

        for (i=0; i<n; i++)
        {
            if ( str.l ) kputc(',', &str);
            kputs(smpls[i], &str);
            free(smpls[i]);
        }
        free(smpls);
    }
    else if ( !args->estimate_AF )
        kputs(args->sample, &str);

    if ( str.l )
    {
        int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
        if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
        else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret);
    }

    if ( args->af_tag )
        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
            error("No such INFO tag in the VCF: %s\n", args->af_tag);

    args->nsmpl = bcf_hdr_nsamples(args->hdr);
    args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample);
    free(str.s);

    int i;
    for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);

    // Init transition matrix and HMM
    double tprob[4];
    MAT(tprob,2,STATE_HW,STATE_HW) = 1 - args->t2AZ;
    MAT(tprob,2,STATE_HW,STATE_AZ) = args->t2HW;
    MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ;
    MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 

    if ( args->genmap_fname ) 
    {
        args->hmm = hmm_init(2, tprob, 0);
        hmm_set_tprob_func(args->hmm, set_tprob_genmap, args);
    }
    else if ( args->rec_rate > 0 )
    {
        args->hmm = hmm_init(2, tprob, 0);
        hmm_set_tprob_func(args->hmm, set_tprob_recrate, args);

    }
    else
        args->hmm = hmm_init(2, tprob, 10000);

    // print header
    printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
    printf("# The command line was:\tbcftools %s", args->argv[0]);
    for (i=1; i<args->argc; i++)
        printf(" %s",args->argv[i]);
    printf("\n#\n");
    printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
}
Пример #11
0
static void init_hmm_trio(args_t *args)
{
    int i,j;
    args->nstates = 8;
    args->tprob   = (double*) malloc(sizeof(double)*args->nstates*args->nstates);

    for (i=0; i<args->nstates; i++)
        for (j=0; j<args->nstates; j++) hap_switch[i][j] = 0;

    hap_switch[TRIO_AD][TRIO_AC] = SW_FATHER;
    hap_switch[TRIO_BC][TRIO_AC] = SW_MOTHER;
    hap_switch[TRIO_BD][TRIO_AC] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_AC][TRIO_AD] = SW_FATHER;
    hap_switch[TRIO_BC][TRIO_AD] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_BD][TRIO_AD] = SW_MOTHER;
    hap_switch[TRIO_AC][TRIO_BC] = SW_MOTHER;
    hap_switch[TRIO_AD][TRIO_BC] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_BD][TRIO_BC] = SW_FATHER;
    hap_switch[TRIO_AC][TRIO_BD] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_AD][TRIO_BD] = SW_MOTHER;
    hap_switch[TRIO_BC][TRIO_BD] = SW_FATHER;

    hap_switch[TRIO_DA][TRIO_CA] = SW_FATHER;
    hap_switch[TRIO_CB][TRIO_CA] = SW_MOTHER;
    hap_switch[TRIO_DB][TRIO_CA] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_CA][TRIO_DA] = SW_FATHER;
    hap_switch[TRIO_CB][TRIO_DA] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_DB][TRIO_DA] = SW_MOTHER;
    hap_switch[TRIO_CA][TRIO_CB] = SW_MOTHER;
    hap_switch[TRIO_DA][TRIO_CB] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_DB][TRIO_CB] = SW_FATHER;
    hap_switch[TRIO_CA][TRIO_DB] = SW_MOTHER | SW_FATHER;
    hap_switch[TRIO_DA][TRIO_DB] = SW_MOTHER;
    hap_switch[TRIO_CB][TRIO_DB] = SW_FATHER;

    for (i=0; i<args->nstates; i++)
    {
        for (j=0; j<args->nstates; j++)
        {
            if ( !hap_switch[i][j] ) MAT(args->tprob,args->nstates,i,j) = 0;
            else
            {
                MAT(args->tprob,args->nstates,i,j) = 1;
                if ( hap_switch[i][j] & SW_MOTHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
                if ( hap_switch[i][j] & SW_FATHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
            }
        }
    }
    for (i=0; i<args->nstates; i++)
    {
        double sum = 0;
        for (j=0; j<args->nstates; j++)
        {
            if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
        }
        MAT(args->tprob,args->nstates,i,i) = 1 - sum;
    }

    #if 0
    for (i=0; i<args->nstates; i++)
    {
        for (j=0; j<args->nstates; j++)
            fprintf(stderr,"\t%d",hap_switch[j][i]);
        fprintf(stderr,"\n");
    }
    for (i=0; i<args->nstates; i++)
    {
        for (j=0; j<args->nstates; j++)
            fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
        fprintf(stderr,"\n");
    }
    #endif

    args->hmm = hmm_init(args->nstates, args->tprob, 10000);
}