static void flush_viterbi(args_t *args) { int i,j; if ( !args->nsites ) return; if ( !args->vi_training ) { // single viterbi pass, one chromsome hmm_run_viterbi(args->hmm, args->nsites, args->eprob, args->sites); hmm_run_fwd_bwd(args->hmm, args->nsites, args->eprob, args->sites); double *fwd = hmm_get_fwd_bwd_prob(args->hmm); const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid); uint8_t *vpath = hmm_get_viterbi_path(args->hmm); for (i=0; i<args->nsites; i++) { int state = vpath[i*2]==STATE_AZ ? 1 : 0; double *pval = fwd + i*2; printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state])); } return; } // viterbi training, multiple chromosomes double t2az_prev, t2hw_prev; double deltaz, delthw; int niter = 0; do { double *tprob_arr = hmm_get_tprob(args->hmm); t2az_prev = MAT(tprob_arr,2,1,0); //args->t2AZ; t2hw_prev = MAT(tprob_arr,2,0,1); //args->t2HW; double tcounts[] = { 0,0,0,0 }; for (i=0; i<args->nrids; i++) { // run viterbi for each chromosomes. eprob and sites contain // multiple chromosomes, rid_offs mark the boundaries int ioff = args->rid_offs[i]; int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff; hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff); // what transitions were observed: add to the total counts uint8_t *vpath = hmm_get_viterbi_path(args->hmm); for (j=1; j<nsites; j++) { // count the number of transitions int prev_state = vpath[2*(j-1)]; int curr_state = vpath[2*j]; MAT(tcounts,2,curr_state,prev_state) += 1; } } // update the transition matrix tprob for (i=0; i<2; i++) { int n = 0; for (j=0; j<2; j++) n += MAT(tcounts,2,i,j); if ( !n) error("fixme: state %d not observed\n", i+1); for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n; } if ( args->genmap_fname || args->rec_rate > 0 ) hmm_set_tprob(args->hmm, tcounts, 0); else hmm_set_tprob(args->hmm, tcounts, 10000); tprob_arr = hmm_get_tprob(args->hmm); deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev); delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev); niter++; fprintf(pysamerr,"%d: %f %f\n", niter,deltaz,delthw); } while ( deltaz > 0.0 || delthw > 0.0 ); fprintf(pysamerr, "Viterbi training converged in %d iterations to", niter); double *tprob_arr = hmm_get_tprob(args->hmm); for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(pysamerr, " %f", MAT(tprob_arr,2,i,j)); fprintf(pysamerr, "\n"); // output the results for (i=0; i<args->nrids; i++) { int ioff = args->rid_offs[i]; int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff; hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff); uint8_t *vpath = hmm_get_viterbi_path(args->hmm); const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]); for (j=0; j<nsites; j++) { printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0); } } }
void flush_viterbi(args_t *args) { const char *s1, *s2, *s3 = NULL; if ( args->mode==C_UNRL ) { s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->isample); s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->jsample); } else if ( args->mode==C_TRIO ) { s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->imother); s3 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ifather); s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ichild); } if ( !args->fp ) { kstring_t str = {0,0,0}; kputs(args->prefix, &str); kputs(".dat", &str); args->fp = fopen(str.s,"w"); if ( !args->fp ) error("%s: %s\n", str.s,strerror(errno)); free(str.s); fprintf(args->fp,"# SG, shared segment\t[2]Chromosome\t[3]Start\t[4]End\t[5]%s:1\t[6]%s:2\n",s2,s2); fprintf(args->fp,"# SW, number of switches\t[3]Sample\t[4]Chromosome\t[5]nHets\t[5]nSwitches\t[6]switch rate\n"); } hmm_run_viterbi(args->hmm,args->nsites,args->eprob,args->sites); uint8_t *vpath = hmm_get_viterbi_path(args->hmm); int i, iprev = -1, prev_state = -1, nstates = hmm_get_nstates(args->hmm); int nswitch_mother = 0, nswitch_father = 0; for (i=0; i<args->nsites; i++) { int state = vpath[i*nstates]; if ( state!=prev_state || i+1==args->nsites ) { uint32_t start = iprev>=0 ? args->sites[iprev]+1 : 1, end = i>0 ? args->sites[i-1] : 1; const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid); if ( args->mode==C_UNRL ) { switch (prev_state) { case UNRL_0x0x: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t-\n", chr,start,end,s1); break; case UNRL_0xx0: fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:1\n", chr,start,end,s1); break; case UNRL_x00x: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t-\n", chr,start,end,s1); break; case UNRL_x0x0: fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:2\n", chr,start,end,s1); break; case UNRL_0101: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s1); break; case UNRL_0110: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s1); break; } } else if ( args->mode==C_TRIO ) { switch (prev_state) { case TRIO_AC: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s1,s3); break; case TRIO_AD: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s3); break; case TRIO_BC: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s3); break; case TRIO_BD: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s1,s3); break; case TRIO_CA: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s3,s1); break; case TRIO_DA: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s3,s1); break; case TRIO_CB: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s3,s1); break; case TRIO_DB: fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s3,s1); break; } if ( hap_switch[state][prev_state] & SW_MOTHER ) nswitch_mother++; if ( hap_switch[state][prev_state] & SW_FATHER ) nswitch_father++; } iprev = i-1; } prev_state = state; } float mrate = args->nhet_mother>1 ? (float)nswitch_mother/(args->nhet_mother-1) : 0; float frate = args->nhet_father>1 ? (float)nswitch_father/(args->nhet_father-1) : 0; fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s1,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_mother,nswitch_mother,mrate); fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s3,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_father,nswitch_father,frate); args->nsites = 0; args->nhet_father = args->nhet_mother = 0; }