Ejemplo n.º 1
0
hmm_t *hmm_init(int nstates, double *tprob, int ntprob)
{
    hmm_t *hmm = (hmm_t*) calloc(1,sizeof(hmm_t));
    hmm->nstates = nstates;
    hmm->curr_tprob = (double*) malloc(sizeof(double)*nstates*nstates);
    hmm->tmp = (double*) malloc(sizeof(double)*nstates*nstates);

    hmm_set_tprob(hmm, tprob, ntprob);

    return hmm;
}
Ejemplo n.º 2
0
static void flush_viterbi(args_t *args)
{
    int i,j;

    if ( !args->nsites ) return; 

    if ( !args->vi_training )
    {
        // single viterbi pass, one chromsome
        hmm_run_viterbi(args->hmm, args->nsites, args->eprob, args->sites);
        hmm_run_fwd_bwd(args->hmm, args->nsites, args->eprob, args->sites);
        double *fwd = hmm_get_fwd_bwd_prob(args->hmm);

        const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid);
        uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
        for (i=0; i<args->nsites; i++)
        {
            int state = vpath[i*2]==STATE_AZ ? 1 : 0;
            double *pval = fwd + i*2;
            printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
        }
        return;
    }

    // viterbi training, multiple chromosomes
    double t2az_prev, t2hw_prev;
    double deltaz, delthw;
    int niter = 0;
    do
    {
        double *tprob_arr = hmm_get_tprob(args->hmm);
        t2az_prev = MAT(tprob_arr,2,1,0); //args->t2AZ;
        t2hw_prev = MAT(tprob_arr,2,0,1); //args->t2HW;
        double tcounts[] = { 0,0,0,0 };
        for (i=0; i<args->nrids; i++)
        {
            // run viterbi for each chromosomes. eprob and sites contain
            // multiple chromosomes, rid_offs mark the boundaries
            int ioff = args->rid_offs[i];
            int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
            hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);

            // what transitions were observed: add to the total counts
            uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
            for (j=1; j<nsites; j++)
            {
                // count the number of transitions
                int prev_state = vpath[2*(j-1)];
                int curr_state = vpath[2*j];
                MAT(tcounts,2,curr_state,prev_state) += 1;
            }
        }

        // update the transition matrix tprob
        for (i=0; i<2; i++)
        {
            int n = 0;
            for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
            if ( !n) error("fixme: state %d not observed\n", i+1);
            for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n;
        }
        if ( args->genmap_fname || args->rec_rate > 0 )
            hmm_set_tprob(args->hmm, tcounts, 0);
        else
            hmm_set_tprob(args->hmm, tcounts, 10000);

        tprob_arr = hmm_get_tprob(args->hmm);
        deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
        delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
        niter++;

        fprintf(pysamerr,"%d: %f %f\n", niter,deltaz,delthw);
    }
    while ( deltaz > 0.0 || delthw > 0.0 );
    fprintf(pysamerr, "Viterbi training converged in %d iterations to", niter);
    double *tprob_arr = hmm_get_tprob(args->hmm);
    for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(pysamerr, " %f", MAT(tprob_arr,2,i,j));
    fprintf(pysamerr, "\n");
    
    // output the results
    for (i=0; i<args->nrids; i++)
    {
        int ioff = args->rid_offs[i];
        int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
        hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
        uint8_t *vpath = hmm_get_viterbi_path(args->hmm);

        const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
        for (j=0; j<nsites; j++)
        {
            printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0);
        }
    }
}