bcf1_t *process(bcf1_t *rec) { if ( args.prev_rid==-1 ) args.prev_rid = rec->rid; if ( args.prev_rid!=rec->rid ) flush_viterbi(&args); args.prev_rid = rec->rid; args.set_observed_prob(rec); return NULL; }
void destroy(void) { flush_viterbi(&args); fclose(args.fp); free(args.gt_arr); free(args.tprob); free(args.sites); free(args.eprob); hmm_destroy(args.hmm); }
static void vcfroh(args_t *args, bcf1_t *line) { // Are we done? if ( !line ) { flush_viterbi(args); return; } args->ntot++; // Skip unwanted lines if ( line->rid == args->skip_rid ) return; if ( line->n_allele==1 ) return; // no ALT allele if ( line->n_allele!=2 ) return; // only biallelic sites if ( args->snps_only && !bcf_is_snp(line) ) return; // Initialize genetic map int skip_rid = 0; if ( args->prev_rid<0 ) { args->prev_rid = line->rid; args->prev_pos = line->pos; skip_rid = load_genmap(args, line); if ( !skip_rid && args->vi_training ) push_rid(args, line->rid); } // New chromosome? if ( args->prev_rid!=line->rid ) { skip_rid = load_genmap(args, line); if ( args->vi_training ) { if ( !skip_rid ) push_rid(args, line->rid); } else { flush_viterbi(args); args->nsites = 0; } args->prev_rid = line->rid; args->prev_pos = line->pos; } if ( skip_rid ) { fprintf(pysamerr,"Skipping the sequence, no genmap for %s\n", bcf_seqname(args->hdr,line)); args->skip_rid = line->rid; return; } if ( args->prev_pos > line->pos ) error("The file is not sorted?!\n"); args->prev_rid = line->rid; args->prev_pos = line->pos; // Ready for the new site int m = args->msites; hts_expand(uint32_t,args->nsites+1,args->msites,args->sites); if ( args->msites!=m ) args->eprob = (double*) realloc(args->eprob,sizeof(double)*args->msites*2); // Set likelihoods and alternate allele frequencies double alt_freq, pdg[3]; if ( parse_line(args, line, &alt_freq, pdg)<0 ) return; // something went wrong args->nused++; // Calculate emission probabilities P(D|AZ) and P(D|HW) double *eprob = &args->eprob[2*args->nsites]; eprob[STATE_AZ] = pdg[0]*(1-alt_freq) + pdg[2]*alt_freq; eprob[STATE_HW] = pdg[0]*(1-alt_freq)*(1-alt_freq) + 2*pdg[1]*(1-alt_freq)*alt_freq + pdg[2]*alt_freq*alt_freq; args->sites[args->nsites] = line->pos; args->nsites++; }