static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end) { bam_hdr_t *hdr = conf->mplp_data[0]->h; // header of first file in input list int ret, i, tid, pos, ref_len; char *ref; while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0) { if ( end && (pos<beg || pos>end) ) continue; if ( conf->bed && tid >= 0 ) { int overlap = regidx_overlap(conf->bed, hdr->target_name[tid], pos, pos, NULL); if ( !conf->bed_logic ) overlap = overlap ? 0 : 1; if ( !overlap ) continue; } mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len); int total_depth, _ref0, ref16; for (i = total_depth = 0; i < conf->nfiles; ++i) total_depth += conf->n_plp[i]; group_smpl(conf->gplp, conf->bsmpl, conf->nfiles, conf->n_plp, conf->plp); _ref0 = (ref && pos < ref_len)? ref[pos] : 'N'; ref16 = seq_nt16_table[_ref0]; bcf_callaux_clean(conf->bca, &conf->bc); for (i = 0; i < conf->gplp->n; ++i) bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], ref16, conf->bca, conf->bcr + i); conf->bc.tid = tid; conf->bc.pos = pos; bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, ref16, &conf->bc); bcf_clear1(conf->bcf_rec); bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, 0, 0); flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec); // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring? // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth && bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0) { bcf_callaux_clean(conf->bca, &conf->bc); for (i = 0; i < conf->gplp->n; ++i) bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], -1, conf->bca, conf->bcr + i); if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0) { bcf_clear1(conf->bcf_rec); bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, conf->bca, ref); flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec); } } } return 0; }
static void mask_region(args_t *args, char *seq, int len) { char *chr = (char*)bcf_hdr_id2name(args->hdr,args->rid); int start = args->fa_src_pos - len; int end = args->fa_src_pos; regitr_t itr; if ( !regidx_overlap(args->mask, chr,start,end, &itr) ) return; int idx_start, idx_end, i; while ( REGITR_OVERLAP(itr,start,end) ) { idx_start = REGITR_START(itr) - start; idx_end = REGITR_END(itr) - start; if ( idx_start < 0 ) idx_start = 0; if ( idx_end >= len ) idx_end = len - 1; for (i=idx_start; i<=idx_end; i++) seq[i] = 'N'; itr.i++; } }
static void apply_variant(args_t *args, bcf1_t *rec) { if ( rec->n_allele==1 ) return; if ( rec->pos <= args->fa_frz_pos ) { fprintf(pysamerr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1); return; } if ( args->mask ) { char *chr = (char*)bcf_hdr_id2name(args->hdr,args->rid); int start = rec->pos; int end = rec->pos + rec->rlen - 1; if ( regidx_overlap(args->mask, chr,start,end,NULL) ) return; } int i, ialt = 1; if ( args->isample >= 0 ) { bcf_fmt_t *fmt = bcf_get_fmt(args->hdr, rec, "GT"); if ( !fmt ) return; if ( args->haplotype ) { if ( args->haplotype > fmt->n ) error("Can't apply %d-th haplotype at %s:%d\n", args->haplotype,bcf_seqname(args->hdr,rec),rec->pos+1); uint8_t *ignore, *ptr = fmt->p + fmt->size*args->isample + args->haplotype - 1; ialt = bcf_dec_int1(ptr, fmt->type, &ignore); if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end ) return; ialt = bcf_gt_allele(ialt); } else if ( args->output_iupac ) { uint8_t *ignore, *ptr = fmt->p + fmt->size*args->isample; ialt = bcf_dec_int1(ptr, fmt->type, &ignore); if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end ) return; ialt = bcf_gt_allele(ialt); int jalt; if ( fmt->n>1 ) { ptr = fmt->p + fmt->size*args->isample + 1; jalt = bcf_dec_int1(ptr, fmt->type, &ignore); if ( bcf_gt_is_missing(jalt) || jalt==bcf_int32_vector_end ) jalt = ialt; else jalt = bcf_gt_allele(jalt); } else jalt = ialt; if ( rec->n_allele <= ialt || rec->n_allele <= jalt ) error("Broken VCF, too few alts at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1); if ( ialt!=jalt && !rec->d.allele[ialt][1] && !rec->d.allele[jalt][1] ) // is this a het snp? { char ial = rec->d.allele[ialt][0]; char jal = rec->d.allele[jalt][0]; rec->d.allele[ialt][0] = gt2iupac(ial,jal); } } else { for (i=0; i<fmt->n; i++) { uint8_t *ignore, *ptr = fmt->p + fmt->size*args->isample + i; ialt = bcf_dec_int1(ptr, fmt->type, &ignore); if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end ) return; ialt = bcf_gt_allele(ialt); if ( ialt ) break; } } if ( !ialt ) return; // ref allele if ( rec->n_allele <= ialt ) error("Broken VCF, too few alts at %s:%d\n", bcf_seqname(args->hdr,rec),rec->pos+1); } else if ( args->output_iupac && !rec->d.allele[0][1] && !rec->d.allele[1][1] ) { char ial = rec->d.allele[0][0]; char jal = rec->d.allele[1][0]; rec->d.allele[1][0] = gt2iupac(ial,jal); } int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off; if ( idx<0 || idx>=args->fa_buf.l ) error("FIXME: %s:%d .. idx=%d, ori_pos=%d, len=%d, off=%d\n",bcf_seqname(args->hdr,rec),rec->pos+1,idx,args->fa_ori_pos,args->fa_buf.l,args->fa_mod_off); // sanity check the reference base int len_diff = 0, alen = 0; if ( rec->d.allele[ialt][0]=='<' ) { if ( strcasecmp(rec->d.allele[ialt], "<DEL>") ) error("Symbolic alleles other than <DEL> are currently not supported: %s at %s:%d\n",rec->d.allele[ialt],bcf_seqname(args->hdr,rec),rec->pos+1); assert( rec->d.allele[0][1]==0 ); // todo: for now expecting strlen(REF) = 1 len_diff = 1-rec->rlen; rec->d.allele[ialt] = rec->d.allele[0]; // according to VCF spec, REF must precede the event alen = strlen(rec->d.allele[ialt]); } else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) ) { // fprintf(pysamerr,"%d .. [%s], idx=%d ori=%d off=%d\n",args->fa_ori_pos,args->fa_buf.s,idx,args->fa_ori_pos,args->fa_mod_off); char tmp = 0; if ( args->fa_buf.l - idx > rec->rlen ) { tmp = args->fa_buf.s[idx+rec->rlen]; args->fa_buf.s[idx+rec->rlen] = 0; } error( "The fasta sequence does not match the REF allele at %s:%d:\n" " .vcf: [%s]\n" " .vcf: [%s] <- (ALT)\n" " .fa: [%s]%c%s\n", bcf_seqname(args->hdr,rec),rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx, tmp?tmp:' ',tmp?args->fa_buf.s+idx+rec->rlen+1:"" ); } else { alen = strlen(rec->d.allele[ialt]); len_diff = alen - rec->rlen; } if ( args->fa_case ) for (i=0; i<alen; i++) rec->d.allele[ialt][i] = toupper(rec->d.allele[ialt][i]); else for (i=0; i<alen; i++) rec->d.allele[ialt][i] = tolower(rec->d.allele[ialt][i]); if ( len_diff <= 0 ) { // deletion or same size event for (i=0; i<alen; i++) args->fa_buf.s[idx+i] = rec->d.allele[ialt][i]; if ( len_diff ) memmove(args->fa_buf.s+idx+alen,args->fa_buf.s+idx+rec->rlen,args->fa_buf.l-idx-rec->rlen); } else { // insertion ks_resize(&args->fa_buf, args->fa_buf.l + len_diff); memmove(args->fa_buf.s + idx + rec->rlen + len_diff, args->fa_buf.s + idx + rec->rlen, args->fa_buf.l - idx - rec->rlen); for (i=0; i<alen; i++) args->fa_buf.s[idx+i] = rec->d.allele[ialt][i]; } if (args->chain && len_diff != 0) { // If first nucleotide of both REF and ALT are the same... (indels typically include the nucleotide before the variant) if ( strncasecmp(rec->d.allele[0],rec->d.allele[ialt],1) == 0) { // ...extend the block by 1 bp: start is 1 bp further and alleles are 1 bp shorter push_chain_gap(args->chain, rec->pos + 1, rec->rlen - 1, rec->pos + 1 + args->fa_mod_off, alen - 1); } else { // otherwise, just the coordinates of the variant as given push_chain_gap(args->chain, rec->pos, rec->rlen, rec->pos + args->fa_mod_off, alen); } } args->fa_buf.l += len_diff; args->fa_mod_off += len_diff; args->fa_frz_pos = rec->pos + rec->rlen - 1; }
static int mplp_func(void *data, bam1_t *b) { char *ref; mplp_aux_t *ma = (mplp_aux_t*)data; int ret, ref_len; while (1) { int has_ref; ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b); if (ret < 0) break; // The 'B' cigar operation is not part of the specification, considering as obsolete. // bam_remove_B(b); if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) continue; // exclude unmapped reads if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) continue; if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) continue; if (ma->conf->bed) { // test overlap regitr_t *itr = ma->conf->bed_itr; int beg = b->core.pos, end = bam_endpos(b)-1; int overlap = regidx_overlap(ma->conf->bed, ma->h->target_name[b->core.tid],beg,end, itr); if ( !ma->conf->bed_logic && !overlap ) { // exclude only reads which are fully contained in the region while ( regitr_overlap(itr) ) { if ( beg < itr->beg ) { overlap = 1; break; } if ( end > itr->end ) { overlap = 1; break; } } } if ( !overlap ) continue; } if ( bam_smpl_get_sample_id(ma->conf->bsmpl,ma->bam_id,b)<0 ) continue; if (ma->conf->flag & MPLP_ILLUMINA13) { int i; uint8_t *qual = bam_get_qual(b); for (i = 0; i < b->core.l_qseq; ++i) qual[i] = qual[i] > 31? qual[i] - 31 : 0; } if (ma->conf->fai && b->core.tid >= 0) { has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len); if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence fprintf(stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n", __func__, b->core.pos, ref_len, b->core.tid); continue; } } else { has_ref = 0; } if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3); if (has_ref && ma->conf->capQ_thres > 10) { int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres); if (q < 0) continue; // skip else if (b->core.qual > q) b->core.qual = q; } if (b->core.qual < ma->conf->min_mq) continue; else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) continue; return ret; }; return ret; }
bcf1_t *process(bcf1_t *rec) { bcf1_t *dflt = args.mode&MODE_LIST_GOOD ? rec : NULL; args.nrec++; if ( rec->n_allele > 63 ) return dflt; // we use 64bit bitmask below int ngt = bcf_get_genotypes(args.hdr, rec, &args.gt_arr, &args.ngt_arr); if ( ngt<0 ) return dflt; if ( ngt!=2*bcf_hdr_nsamples(args.hdr) && ngt!=bcf_hdr_nsamples(args.hdr) ) return dflt; ngt /= bcf_hdr_nsamples(args.hdr); int itr_set = regidx_overlap(args.rules, bcf_seqname(args.hdr,rec),rec->pos,rec->pos, args.itr_ori); int i, has_bad = 0, needs_update = 0; for (i=0; i<args.ntrios; i++) { int32_t a,b,c,d,e,f; trio_t *trio = &args.trios[i]; a = args.gt_arr[ngt*trio->imother]; b = ngt==2 ? args.gt_arr[ngt*trio->imother+1] : bcf_int32_vector_end; c = args.gt_arr[ngt*trio->ifather]; d = ngt==2 ? args.gt_arr[ngt*trio->ifather+1] : bcf_int32_vector_end; e = args.gt_arr[ngt*trio->ichild]; f = ngt==2 ? args.gt_arr[ngt*trio->ichild+1] : bcf_int32_vector_end; // skip sites with missing data in child if ( bcf_gt_is_missing(e) || bcf_gt_is_missing(f) ) continue; uint64_t mother = 0, father = 0,child1,child2; int is_ok = 0; if ( !itr_set ) { if ( f==bcf_int32_vector_end ) { warn_ploidy(rec); continue; } // All M,F,C genotypes are diploid. Missing data are considered consistent. child1 = 1<<bcf_gt_allele(e); child2 = 1<<bcf_gt_allele(f); mother = bcf_gt_is_missing(a) ? child1|child2 : 1<<bcf_gt_allele(a); mother |= bcf_gt_is_missing(b) || b==bcf_int32_vector_end ? child1|child2 : 1<<bcf_gt_allele(b); father = bcf_gt_is_missing(c) ? child1|child2 : 1<<bcf_gt_allele(c); father |= bcf_gt_is_missing(d) || d==bcf_int32_vector_end ? child1|child2 : 1<<bcf_gt_allele(d); if ( (mother&child1 && father&child2) || (mother&child2 && father&child1) ) is_ok = 1; } else { child1 = 1<<bcf_gt_allele(e); child2 = bcf_gt_is_missing(f) || f==bcf_int32_vector_end ? 0 : 1<<bcf_gt_allele(f); mother |= bcf_gt_is_missing(a) ? 0 : 1<<bcf_gt_allele(a); mother |= bcf_gt_is_missing(b) || b==bcf_int32_vector_end ? 0 : 1<<bcf_gt_allele(b); father |= bcf_gt_is_missing(c) ? 0 : 1<<bcf_gt_allele(c); father |= bcf_gt_is_missing(d) || d==bcf_int32_vector_end ? 0 : 1<<bcf_gt_allele(d); regitr_copy(args.itr, args.itr_ori); while ( !is_ok && regitr_overlap(args.itr) ) { rule_t *rule = ®itr_payload(args.itr,rule_t); if ( child1 && child2 ) { if ( !rule->mal || !rule->fal ) continue; // wrong rule (haploid), but this is a diploid GT if ( !mother ) mother = child1|child2; if ( !father ) father = child1|child2; if ( (mother&child1 && father&child2) || (mother&child2 && father&child1) ) is_ok = 1; continue; } if ( rule->mal ) { if ( mother && !(child1&mother) ) continue; } if ( rule->fal ) { if ( father && !(child1&father) ) continue; } is_ok = 1; } } if ( is_ok ) { trio->nok++; } else { trio->nbad++; has_bad = 1; if ( args.mode&MODE_DELETE ) { args.gt_arr[ngt*trio->imother] = bcf_gt_missing; if ( b!=bcf_int32_vector_end ) args.gt_arr[ngt*trio->imother+1] = bcf_gt_missing; // should be always true args.gt_arr[ngt*trio->ifather] = bcf_gt_missing; if ( d!=bcf_int32_vector_end ) args.gt_arr[ngt*trio->ifather+1] = bcf_gt_missing; args.gt_arr[ngt*trio->ichild] = bcf_gt_missing; if ( f!=bcf_int32_vector_end ) args.gt_arr[ngt*trio->ichild+1] = bcf_gt_missing; needs_update = 1; } } } if ( needs_update && bcf_update_genotypes(args.hdr,rec,args.gt_arr,ngt*bcf_hdr_nsamples(args.hdr)) ) error("Could not update GT field at %s:%d\n", bcf_seqname(args.hdr,rec),rec->pos+1); if ( args.mode&MODE_DELETE ) return rec; if ( args.mode&MODE_LIST_GOOD ) return has_bad ? NULL : rec; if ( args.mode&MODE_LIST_BAD ) return has_bad ? rec : NULL; return NULL; }
static int query_regions(args_t *args, char *fname, char **regs, int nregs) { int i; htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); enum htsExactFormat format = hts_get_format(fp)->format; regidx_t *reg_idx = NULL; if ( args->targets_fname ) { reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); if ( !reg_idx ) error("Could not read %s\n", args->targets_fname); } if ( format == bcf ) { htsFile *out = hts_open("-","w"); if ( !out ) error("Could not open stdout\n", fname); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); if ( args->print_header ) bcf_hdr_write(out,hdr); if ( !args->header_only ) { bcf1_t *rec = bcf_init(); for (i=0; i<nregs; i++) { hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]); while ( bcf_itr_next(fp, itr, rec) >=0 ) { if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; bcf_write(out,hdr,rec); } tbx_itr_destroy(itr); } bcf_destroy(rec); } if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n"); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( format==vcf || format==sam || format==unknown_format ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname); kstring_t str = {0,0,0}; if ( args->print_header ) { while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 ) { if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; puts(str.s); } } if ( !args->header_only ) { int nseq; const char **seq = NULL; if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nregs; i++) { hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]); if ( !itr ) continue; while (tbx_itr_next(fp, tbx, itr, &str) >= 0) { if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue; puts(str.s); } tbx_itr_destroy(itr); } free(seq); } free(str.s); tbx_destroy(tbx); } else if ( format==bam ) error("Please use \"samtools view\" for querying BAM files.\n"); if ( reg_idx ) regidx_destroy(reg_idx); if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname); for (i=0; i<nregs; i++) free(regs[i]); free(regs); return 0; }
int run(int argc, char **argv) { args_t *args = (args_t*) calloc(1,sizeof(args_t)); args->nsites = 10; args->min_hets = 0.3; args->background = "X:60001-2699520"; static struct option loptions[] = { {"verbose",1,0,'v'}, {"ploidy",1,0,'p'}, {"nsites",1,0,'n'}, {"guess",1,0,'g'}, {"min-hets",1,0,'m'}, {"background",1,0,'b'}, {0,0,0,0} }; char c, *tmp, *ploidy_fname = NULL; while ((c = getopt_long(argc, argv, "p:n:g:m:vb:",loptions,NULL)) >= 0) { switch (c) { case 'b': if ( !strcmp("-",optarg) ) args->background = NULL; else args->background = optarg; break; case 'v': args->verbose = 1; break; case 'g': if ( !strcasecmp(optarg,"GT") ) args->guess = GUESS_GT; else if ( !strcasecmp(optarg,"PL") ) args->guess = GUESS_PL; else if ( !strcasecmp(optarg,"GL") ) args->guess = GUESS_GL; else error("The argument not recognised, expected --guess GT, --guess PL or --guess GL: %s\n", optarg); break; case 'm': args->min_hets = strtod(optarg,&tmp); if ( *tmp ) error("Unexpected argument to --min-hets: %s\n", optarg); break; case 'p': ploidy_fname = optarg; break; case 'n': args->nsites = strtol(optarg,&tmp,10); if (*tmp) error("Unexpected argument to --nsites: %s\n", optarg); break; case 'h': case '?': default: error("%s", usage()); break; } } args->sr = bcf_sr_init(); args->sr->require_index = 1; if ( !argv[0] ) error("%s", usage()); if ( !bcf_sr_add_reader(args->sr,argv[0]) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum)); args->hdr = args->sr->readers[0].header; args->nsample = bcf_hdr_nsamples(args->hdr); args->dflt_ploidy = 2; if ( ploidy_fname ) { args->ploidy = ploidy_init(ploidy_fname, args->dflt_ploidy); if ( !args->ploidy ) error("Could not read %s\n", ploidy_fname); } else { args->ploidy = ploidy_init_string( "X 1 60000 M 1\n" "X 2699521 154931043 M 1\n" "Y 1 59373566 M 1\n" "Y 1 59373566 F 0\n", args->dflt_ploidy); } args->nsex = ploidy_nsex(args->ploidy); args->sex2ploidy = (int*) malloc(sizeof(int)*args->nsex); args->max_ploidy = ploidy_max(args->ploidy); if ( args->guess && args->max_ploidy > 2 ) error("Sorry, ploidy %d not supported with -g\n", args->max_ploidy); args->ncounts = args->nsample * ((args->max_ploidy>2 ? args->max_ploidy : 2)+1); args->counts = (int*) malloc(sizeof(int)*args->ncounts); args->bg_counts = (count_t*) calloc(args->nsample,sizeof(count_t)); args->sex2prob = (float*) calloc(args->nsample*args->nsex,sizeof(float)); int i, nseq; for (i=0; i<args->nsample*args->nsex; i++) args->sex2prob[i] = 1; if ( args->verbose && args->guess ) printf("# [1]REG\t[2]Region\t[3]Sample\t[4]Het fraction\t[5]nHet\t[6]nHom\t[7]nMissing\n"); // First get the counts from expected haploid regions regidx_t *idx = ploidy_regions(args->ploidy); char **seqs = regidx_seq_names(idx, &nseq); for (i=0; i<nseq; i++) { regitr_t itr; regidx_overlap(idx, seqs[i], 0, UINT32_MAX, &itr); while ( itr.i < itr.n ) { if ( args->guess ) itr.i += process_region_guess(args, seqs[i], &itr); else itr.i += process_region_precise(args, seqs[i], &itr); } } // Get the counts from a PAR (the background diploid region) and see if the fraction // of hets is different if ( args->guess ) sex2prob_guess(args); for (i=0; i<args->nsample; i++) { int j, jmax = 0; float max = 0, sum = 0; for (j=0; j<args->nsex; j++) { sum += args->sex2prob[i*args->nsex+j]; if ( max < args->sex2prob[i*args->nsex+j] ) { jmax = j; max = args->sex2prob[i*args->nsex+j]; } } if ( args->verbose ) printf("%s\t%s\t%f\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax),args->sex2prob[i*args->nsex+jmax]/sum); else printf("%s\t%s\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax)); } bcf_sr_destroy(args->sr); ploidy_destroy(args->ploidy); destroy_regs(args); free(args->sex2ploidy); free(args->counts); free(args->bg_counts); free(args->gts); free(args->pls); free(args->sex2prob); free(args); return 0; }