static void destroy_data(args_t *args) { bcf_sr_destroy(args->files); int i; for (i=0; i<args->vcf_rbuf.m; i++) if ( args->vcf_buf[i] ) bcf_destroy1(args->vcf_buf[i]); free(args->vcf_buf); free(args->fa_buf.s); if ( args->mask ) regidx_destroy(args->mask); if ( args->chain_fname ) if ( fclose(args->fp_chain) ) error("Close failed: %s\n", args->chain_fname); if ( fclose(args->fp_out) ) error("Close failed: %s\n", args->output_fname); }
int run(int argc, char **argv) { char *trio_samples = NULL, *trio_file = NULL, *rules_fname = NULL, *rules_string = NULL; memset(&args,0,sizeof(args_t)); args.mode = 0; args.output_fname = "-"; static struct option loptions[] = { {"trio",1,0,'t'}, {"trio-file",1,0,'T'}, {"delete",0,0,'d'}, {"list",1,0,'l'}, {"count",0,0,'c'}, {"rules",1,0,'r'}, {"rules-file",1,0,'R'}, {"output",required_argument,NULL,'o'}, {"output-type",required_argument,NULL,'O'}, {0,0,0,0} }; int c; while ((c = getopt_long(argc, argv, "?ht:T:l:cdr:R:o:O:",loptions,NULL)) >= 0) { switch (c) { case 'o': args.output_fname = optarg; break; case 'O': switch (optarg[0]) { case 'b': args.output_type = FT_BCF_GZ; break; case 'u': args.output_type = FT_BCF; break; case 'z': args.output_type = FT_VCF_GZ; break; case 'v': args.output_type = FT_VCF; break; default: error("The output type \"%s\" not recognised\n", optarg); }; break; case 'R': rules_fname = optarg; break; case 'r': rules_string = optarg; break; case 'd': args.mode |= MODE_DELETE; break; case 'c': args.mode |= MODE_COUNT; break; case 'l': if ( !strcmp("+",optarg) ) args.mode |= MODE_LIST_GOOD; else if ( !strcmp("x",optarg) ) args.mode |= MODE_LIST_BAD; else error("The argument not recognised: --list %s\n", optarg); break; case 't': trio_samples = optarg; break; case 'T': trio_file = optarg; break; case 'h': case '?': default: error("%s",usage()); break; } } if ( rules_fname ) args.rules = regidx_init(rules_fname, parse_rules, NULL, sizeof(rule_t), &args); else args.rules = init_rules(&args, rules_string); if ( !args.rules ) return -1; args.itr = regitr_init(args.rules); args.itr_ori = regitr_init(args.rules); char *fname = NULL; if ( optind>=argc || argv[optind][0]=='-' ) { if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin else error("%s",usage()); } else fname = argv[optind]; if ( !trio_samples && !trio_file ) error("Expected the -t/T option\n"); if ( !args.mode ) error("Expected one of the -c, -d or -l options\n"); if ( args.mode&MODE_DELETE && !(args.mode&(MODE_LIST_GOOD|MODE_LIST_BAD)) ) args.mode |= MODE_LIST_GOOD|MODE_LIST_BAD; args.sr = bcf_sr_init(); if ( !bcf_sr_add_reader(args.sr, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args.sr->errnum)); args.hdr = bcf_sr_get_header(args.sr, 0); args.out_fh = hts_open(args.output_fname,hts_bcf_wmode(args.output_type)); if ( args.out_fh == NULL ) error("Can't write to \"%s\": %s\n", args.output_fname, strerror(errno)); bcf_hdr_write(args.out_fh, args.hdr); int i, n = 0; char **list; if ( trio_samples ) { args.ntrios = 1; args.trios = (trio_t*) calloc(1,sizeof(trio_t)); list = hts_readlist(trio_samples, 0, &n); if ( n!=3 ) error("Expected three sample names with -t\n"); args.trios[0].imother = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[0]); args.trios[0].ifather = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[1]); args.trios[0].ichild = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[2]); for (i=0; i<n; i++) free(list[i]); free(list); } if ( trio_file ) { list = hts_readlist(trio_file, 1, &n); args.ntrios = n; args.trios = (trio_t*) calloc(n,sizeof(trio_t)); for (i=0; i<n; i++) { char *ss = list[i], *se; se = strchr(ss, ','); if ( !se ) error("Could not parse %s: %s\n",trio_file, ss); *se = 0; args.trios[i].imother = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, ss); if ( args.trios[i].imother<0 ) error("No such sample: \"%s\"\n", ss); ss = ++se; se = strchr(ss, ','); if ( !se ) error("Could not parse %s\n",trio_file); *se = 0; args.trios[i].ifather = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, ss); if ( args.trios[i].ifather<0 ) error("No such sample: \"%s\"\n", ss); ss = ++se; if ( *ss=='\0' ) error("Could not parse %s\n",trio_file); args.trios[i].ichild = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, ss); if ( args.trios[i].ichild<0 ) error("No such sample: \"%s\"\n", ss); free(list[i]); } free(list); } while ( bcf_sr_next_line(args.sr) ) { bcf1_t *line = bcf_sr_get_line(args.sr,0); line = process(line); if ( line ) { if ( line->errcode ) error("TODO: Unchecked error (%d), exiting\n",line->errcode); bcf_write1(args.out_fh, args.hdr, line); } } fprintf(stderr,"# [1]nOK\t[2]nBad\t[3]nSkipped\t[4]Trio\n"); for (i=0; i<args.ntrios; i++) { trio_t *trio = &args.trios[i]; fprintf(stderr,"%d\t%d\t%d\t%s,%s,%s\n", trio->nok,trio->nbad,args.nrec-(trio->nok+trio->nbad), bcf_hdr_int2id(args.hdr, BCF_DT_SAMPLE, trio->imother), bcf_hdr_int2id(args.hdr, BCF_DT_SAMPLE, trio->ifather), bcf_hdr_int2id(args.hdr, BCF_DT_SAMPLE, trio->ichild) ); } free(args.gt_arr); free(args.trios); regitr_destroy(args.itr); regitr_destroy(args.itr_ori); regidx_destroy(args.rules); bcf_sr_destroy(args.sr); if ( hts_close(args.out_fh)!=0 ) error("Error: close failed\n"); return 0; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0, noref = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; mplp.output_type = FT_VCF; mplp.record_cmd_line = 1; mplp.n_threads = 0; mplp.bsmpl = bam_smpl_init(); static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"ignore-RG", no_argument, NULL, 5}, {"ignore-rg", no_argument, NULL, 5}, {"gvcf", required_argument, NULL, 'g'}, {"no-reference", no_argument, NULL, 7}, {"no-version", no_argument, NULL, 8}, {"threads",required_argument,NULL,9}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"read-groups", required_argument, NULL, 'G'}, {"region", required_argument, NULL, 'r'}, {"regions", required_argument, NULL, 'r'}, {"regions-file", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 't'}, {"targets-file", required_argument, NULL, 'T'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"output-type", required_argument, NULL, 'O'}, {"samples", required_argument, NULL, 's'}, {"samples-file", required_argument, NULL, 'S'}, {"annotate", required_argument, NULL, 'a'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break; case 'g': mplp.gvcf = gvcf_init(optarg); if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == NULL) return 1; mplp.fai_fname = optarg; break; case 7 : noref = 1; break; case 8 : mplp.record_cmd_line = 0; break; case 9 : mplp.n_threads = strtol(optarg, 0, 0); break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg_fname = strdup(optarg); break; case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break; case 't': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL); mplp.bed_itr = regitr_init(mplp.bed); if ( regidx_insert_list(mplp.bed,optarg,',') !=0 ) { fprintf(stderr,"Could not parse the targets: %s\n", optarg); exit(EXIT_FAILURE); } break; case 'T': if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL); if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break; case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break; case 'O': switch (optarg[0]) { case 'b': mplp.output_type = FT_BCF_GZ; break; case 'u': mplp.output_type = FT_BCF; break; case 'z': mplp.output_type = FT_VCF_GZ; break; case 'v': mplp.output_type = FT_VCF; break; default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); } break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break; case 'a': if (optarg[0]=='?') { list_annotations(stderr); return 1; } mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) ) { fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n"); mplp.fmt_flag |= B2B_FMT_DP; } if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) ) { if ( mplp.flag&MPLP_VCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF; else mplp.output_type = FT_VCF_GZ; } else if ( mplp.flag&MPLP_BCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF; else mplp.output_type = FT_BCF_GZ; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } if (!mplp.fai && !noref) { fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n"); return 1; } int ret,i; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mplp.files = fn; mplp.nfiles = nfiles; } else { mplp.nfiles = argc - optind; mplp.files = (char**) malloc(mplp.nfiles*sizeof(char*)); for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]); } ret = mpileup(&mplp); for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]); free(mplp.files); free(mplp.reg_fname); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) regidx_destroy(mplp.bed); if (mplp.bed_itr) regitr_destroy(mplp.bed_itr); if (mplp.reg) regidx_destroy(mplp.reg); bam_smpl_destroy(mplp.bsmpl); return ret; }
static int query_regions(args_t *args, char *fname, char **regs, int nregs) { int i; htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); enum htsExactFormat format = hts_get_format(fp)->format; regidx_t *reg_idx = NULL; if ( args->targets_fname ) { reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); if ( !reg_idx ) error("Could not read %s\n", args->targets_fname); } if ( format == bcf ) { htsFile *out = hts_open("-","w"); if ( !out ) error("Could not open stdout\n", fname); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); if ( args->print_header ) bcf_hdr_write(out,hdr); if ( !args->header_only ) { bcf1_t *rec = bcf_init(); for (i=0; i<nregs; i++) { hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]); while ( bcf_itr_next(fp, itr, rec) >=0 ) { if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; bcf_write(out,hdr,rec); } tbx_itr_destroy(itr); } bcf_destroy(rec); } if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n"); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( format==vcf || format==sam || format==unknown_format ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname); kstring_t str = {0,0,0}; if ( args->print_header ) { while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 ) { if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; puts(str.s); } } if ( !args->header_only ) { int nseq; const char **seq = NULL; if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nregs; i++) { hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]); if ( !itr ) continue; while (tbx_itr_next(fp, tbx, itr, &str) >= 0) { if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue; puts(str.s); } tbx_itr_destroy(itr); } free(seq); } free(str.s); tbx_destroy(tbx); } else if ( format==bam ) error("Please use \"samtools view\" for querying BAM files.\n"); if ( reg_idx ) regidx_destroy(reg_idx); if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname); for (i=0; i<nregs; i++) free(regs[i]); free(regs); return 0; }