hmm_t *hmm_init(int nstates, double *tprob, int ntprob) { hmm_t *hmm = (hmm_t*) calloc(1,sizeof(hmm_t)); hmm->nstates = nstates; hmm->curr_tprob = (double*) malloc(sizeof(double)*nstates*nstates); hmm->tmp = (double*) malloc(sizeof(double)*nstates*nstates); hmm_set_tprob(hmm, tprob, ntprob); return hmm; }
static void flush_viterbi(args_t *args) { int i,j; if ( !args->nsites ) return; if ( !args->vi_training ) { // single viterbi pass, one chromsome hmm_run_viterbi(args->hmm, args->nsites, args->eprob, args->sites); hmm_run_fwd_bwd(args->hmm, args->nsites, args->eprob, args->sites); double *fwd = hmm_get_fwd_bwd_prob(args->hmm); const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid); uint8_t *vpath = hmm_get_viterbi_path(args->hmm); for (i=0; i<args->nsites; i++) { int state = vpath[i*2]==STATE_AZ ? 1 : 0; double *pval = fwd + i*2; printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state])); } return; } // viterbi training, multiple chromosomes double t2az_prev, t2hw_prev; double deltaz, delthw; int niter = 0; do { double *tprob_arr = hmm_get_tprob(args->hmm); t2az_prev = MAT(tprob_arr,2,1,0); //args->t2AZ; t2hw_prev = MAT(tprob_arr,2,0,1); //args->t2HW; double tcounts[] = { 0,0,0,0 }; for (i=0; i<args->nrids; i++) { // run viterbi for each chromosomes. eprob and sites contain // multiple chromosomes, rid_offs mark the boundaries int ioff = args->rid_offs[i]; int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff; hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff); // what transitions were observed: add to the total counts uint8_t *vpath = hmm_get_viterbi_path(args->hmm); for (j=1; j<nsites; j++) { // count the number of transitions int prev_state = vpath[2*(j-1)]; int curr_state = vpath[2*j]; MAT(tcounts,2,curr_state,prev_state) += 1; } } // update the transition matrix tprob for (i=0; i<2; i++) { int n = 0; for (j=0; j<2; j++) n += MAT(tcounts,2,i,j); if ( !n) error("fixme: state %d not observed\n", i+1); for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n; } if ( args->genmap_fname || args->rec_rate > 0 ) hmm_set_tprob(args->hmm, tcounts, 0); else hmm_set_tprob(args->hmm, tcounts, 10000); tprob_arr = hmm_get_tprob(args->hmm); deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev); delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev); niter++; fprintf(pysamerr,"%d: %f %f\n", niter,deltaz,delthw); } while ( deltaz > 0.0 || delthw > 0.0 ); fprintf(pysamerr, "Viterbi training converged in %d iterations to", niter); double *tprob_arr = hmm_get_tprob(args->hmm); for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(pysamerr, " %f", MAT(tprob_arr,2,i,j)); fprintf(pysamerr, "\n"); // output the results for (i=0; i<args->nrids; i++) { int ioff = args->rid_offs[i]; int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff; hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff); uint8_t *vpath = hmm_get_viterbi_path(args->hmm); const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]); for (j=0; j<nsites; j++) { printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0); } } }