int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); #define MPLP_PRINT_POS 0x4000 mplp.max_mq = 60; mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': mplp.bed = bed_read(optarg); break; case 'P': mplp.pl_list = strdup(optarg); break; case 'g': mplp.flag |= MPLP_GLF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break; case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.flag |= MPLP_FMT_DP; break; case 'S': mplp.flag |= MPLP_FMT_SP; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_EXT_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': mplp.openQ = atoi(optarg); break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = bcf_str2id_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... bcf_str2id_add(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; } } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n"); fprintf(stderr, "Input options:\n\n"); fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A count anomalous read pairs\n"); fprintf(stderr, " -B disable BAQ computation\n"); fprintf(stderr, " -b FILE list of input BAM files [null]\n"); fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n"); fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth); fprintf(stderr, " -E extended BAQ for higher sensitivity but lower specificity\n"); fprintf(stderr, " -f FILE faidx indexed reference sequence file [null]\n"); fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n"); fprintf(stderr, " -l FILE list of positions (chr pos) or regions (BED) [null]\n"); fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); fprintf(stderr, " -r STR region in which pileup is generated [null]\n"); fprintf(stderr, " -R ignore RG tags\n"); fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ); fprintf(stderr, "\nOutput options:\n\n"); fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n"); fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n"); fprintf(stderr, " -O output base positions on reads (disabled by -g/-u)\n"); fprintf(stderr, " -s output mapping quality (disabled by -g/-u)\n"); fprintf(stderr, " -S output per-sample strand bias P-value in BCF (require -g/-u)\n"); fprintf(stderr, " -u generate uncompress BCF output\n"); fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n"); fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ); fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); fprintf(stderr, " -h INT coefficient for homopolymer errors [%d]\n", mplp.tandemQ); fprintf(stderr, " -I do not perform indel calling\n"); fprintf(stderr, " -L INT max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth); fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ); fprintf(stderr, " -P STR comma separated list of platforms for indels [all]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; } if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mpileup(&mplp,nfiles,fn); for (c=0; c<nfiles; c++) free(fn[c]); free(fn); } else mpileup(&mplp, argc - optind, argv + optind); if (mplp.rghash) bcf_str2id_thorough_destroy(mplp.rghash); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) bed_destroy(mplp.bed); return 0; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"exclude-RG", required_argument, NULL, 'G'}, {"exclude-rg", required_argument, NULL, 'G'}, {"positions", required_argument, NULL, 'l'}, {"region", required_argument, NULL, 'r'}, {"ignore-RG", no_argument, NULL, 'R'}, {"ignore-rg", no_argument, NULL, 'R'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"BCF", no_argument, NULL, 'g'}, {"bcf", no_argument, NULL, 'g'}, {"VCF", no_argument, NULL, 'v'}, {"vcf", no_argument, NULL, 'v'}, {"output-BP", no_argument, NULL, 'O'}, {"output-bp", no_argument, NULL, 'O'}, {"output-MQ", no_argument, NULL, 's'}, {"output-mq", no_argument, NULL, 's'}, {"output-tags", required_argument, NULL, 't'}, {"uncompressed", no_argument, NULL, 'u'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; mplp.fai_fname = optarg; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. mplp.bed = bed_read(optarg); if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'g': mplp.flag |= MPLP_BCF; break; case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break; case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break; case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = khash_str2int_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... khash_str2int_inc(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; case 't': mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } int ret; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; ret = mpileup(&mplp,nfiles,fn); for (c=0; c<nfiles; c++) free(fn[c]); free(fn); } else ret = mpileup(&mplp, argc - optind, argv + optind); if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) bed_destroy(mplp.bed); return ret; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0, noref = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; mplp.output_type = FT_VCF; mplp.record_cmd_line = 1; mplp.n_threads = 0; mplp.bsmpl = bam_smpl_init(); static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"ignore-RG", no_argument, NULL, 5}, {"ignore-rg", no_argument, NULL, 5}, {"gvcf", required_argument, NULL, 'g'}, {"no-reference", no_argument, NULL, 7}, {"no-version", no_argument, NULL, 8}, {"threads",required_argument,NULL,9}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"read-groups", required_argument, NULL, 'G'}, {"region", required_argument, NULL, 'r'}, {"regions", required_argument, NULL, 'r'}, {"regions-file", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 't'}, {"targets-file", required_argument, NULL, 'T'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"output-type", required_argument, NULL, 'O'}, {"samples", required_argument, NULL, 's'}, {"samples-file", required_argument, NULL, 'S'}, {"annotate", required_argument, NULL, 'a'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break; case 'g': mplp.gvcf = gvcf_init(optarg); if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == NULL) return 1; mplp.fai_fname = optarg; break; case 7 : noref = 1; break; case 8 : mplp.record_cmd_line = 0; break; case 9 : mplp.n_threads = strtol(optarg, 0, 0); break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg_fname = strdup(optarg); break; case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break; case 't': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL); mplp.bed_itr = regitr_init(mplp.bed); if ( regidx_insert_list(mplp.bed,optarg,',') !=0 ) { fprintf(stderr,"Could not parse the targets: %s\n", optarg); exit(EXIT_FAILURE); } break; case 'T': if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL); if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break; case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break; case 'O': switch (optarg[0]) { case 'b': mplp.output_type = FT_BCF_GZ; break; case 'u': mplp.output_type = FT_BCF; break; case 'z': mplp.output_type = FT_VCF_GZ; break; case 'v': mplp.output_type = FT_VCF; break; default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); } break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break; case 'a': if (optarg[0]=='?') { list_annotations(stderr); return 1; } mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) ) { fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n"); mplp.fmt_flag |= B2B_FMT_DP; } if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) ) { if ( mplp.flag&MPLP_VCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF; else mplp.output_type = FT_VCF_GZ; } else if ( mplp.flag&MPLP_BCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF; else mplp.output_type = FT_BCF_GZ; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } if (!mplp.fai && !noref) { fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n"); return 1; } int ret,i; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mplp.files = fn; mplp.nfiles = nfiles; } else { mplp.nfiles = argc - optind; mplp.files = (char**) malloc(mplp.nfiles*sizeof(char*)); for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]); } ret = mpileup(&mplp); for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]); free(mplp.files); free(mplp.reg_fname); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) regidx_destroy(mplp.bed); if (mplp.bed_itr) regitr_destroy(mplp.bed_itr); if (mplp.reg) regidx_destroy(mplp.reg); bam_smpl_destroy(mplp.bsmpl); return ret; }
int main_uniq(int argc, char *argv[]) { int c, i; char *bam_file = NULL; char *vcf_in = NULL; /* - == stdout */ char *vcf_out = NULL; /* - == stdout */ mplp_conf_t mplp_conf; uniq_conf_t uniq_conf; void (*plp_proc_func)(const plp_col_t*, void*); int rc = 0; var_t **vars = NULL; int num_vars = 0; char *vcf_header = NULL; static int use_det_lim = 0; static int use_orphan = 0; static int output_all = 0; static int is_somatic = 0; /* default uniq options */ memset(&uniq_conf, 0, sizeof(uniq_conf_t)); uniq_conf.uni_freq = DEFAULT_UNI_FREQ; uniq_conf.use_det_lim = 0; uniq_conf.uniq_filter.mtc_type = MTC_FDR; uniq_conf.uniq_filter.alpha = 0.001; /* default pileup options */ memset(&mplp_conf, 0, sizeof(mplp_conf_t)); mplp_conf.max_mq = DEFAULT_MAX_MQ; mplp_conf.min_mq = 1; mplp_conf.min_plp_bq = DEFAULT_MIN_PLP_BQ; mplp_conf.max_depth = DEFAULT_MAX_PLP_DEPTH; mplp_conf.flag = MPLP_NO_ORPHAN; /* keep in sync with long_opts_str and usage * * getopt is a pain in the whole when it comes to syncing of long * and short args and usage. check out gopt, libcfu... */ while (1) { static struct option long_opts[] = { /* see usage sync */ {"help", no_argument, NULL, 'h'}, {"verbose", no_argument, &verbose, 1}, {"debug", no_argument, &debug, 1}, {"use-det-lim", no_argument, &use_det_lim, 1}, {"use-orphan", no_argument, &use_orphan, 1}, {"output-all", no_argument, &output_all, 1}, {"is-somatic", no_argument, &is_somatic, 1}, {"vcf-in", required_argument, NULL, 'v'}, {"vcf-out", required_argument, NULL, 'o'}, {"uni-freq", required_argument, NULL, 'f'}, {"uniq-thresh", required_argument, NULL, 't'}, {"uniq-mtc", required_argument, NULL, 'm'}, {"uniq-alpha", required_argument, NULL, 'a'}, {"uniq-ntests", required_argument, NULL, 'n'}, {0, 0, 0, 0} /* sentinel */ }; /* keep in sync with long_opts and usage */ static const char *long_opts_str = "hv:o:f:t:m:a:n:"; /* getopt_long stores the option index here. */ int long_opts_index = 0; c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */ long_opts_str, long_opts, & long_opts_index); if (c == -1) { break; } switch (c) { /* keep in sync with long_opts etc */ case 'h': usage(& uniq_conf); return 0; case 'v': if (0 != strcmp(optarg, "-")) { if (! file_exists(optarg)) { LOG_FATAL("Input file '%s' does not exist. Exiting...\n", optarg); return 1; } } vcf_in = strdup(optarg); break; case 'o': if (0 != strcmp(optarg, "-")) { if (file_exists(optarg)) { LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg); return 1; } } vcf_out = strdup(optarg); break; case 'f': uniq_conf.uni_freq = strtof(optarg, (char **)NULL); /* atof */ if (uniq_conf.uni_freq<=0) { LOG_WARN("%s\n", "Ignoring uni-freq option"); } if (uniq_conf.uni_freq>1.0) { LOG_FATAL("%s\n", "Value for uni-freq has to be <1.0"); return 1; } break; case 't': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } uniq_conf.uniq_filter.thresh = atoi(optarg); uniq_conf.uniq_filter.mtc_type = MTC_NONE; break; case 'm': uniq_conf.uniq_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == uniq_conf.uniq_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'a': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } uniq_conf.uniq_filter.alpha = strtof(optarg, NULL); break; case 'n': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } uniq_conf.uniq_filter.ntests = atol(optarg); break; case '?': LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n"); return 1; default: break; } } if (use_orphan) { mplp_conf.flag &= ~MPLP_NO_ORPHAN; } if (debug) { dump_mplp_conf(& mplp_conf, stderr); } uniq_conf.output_all = output_all; uniq_conf.use_det_lim = use_det_lim; #if DEBUG LOG_DEBUG("uniq_conf.uniq_filter.thresh = %d\n", uniq_conf.uniq_filter.thresh); LOG_DEBUG("uniq_conf.uniq_filter.mtc_type = %d\n", uniq_conf.uniq_filter.mtc_type); LOG_DEBUG("uniq_conf.uniq_filter.alpha = %f\n", uniq_conf.uniq_filter.alpha); LOG_DEBUG("uniq_conf.uniq_filter.ntests = %d\n", uniq_conf.uniq_filter.ntests); #endif if (uniq_conf.uniq_filter.thresh && uniq_conf.uniq_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed Unique quality threshold *and* multiple testing correction."); return 1; } if (argc == 2) { fprintf(stderr, "\n"); usage(& uniq_conf); return 1; } if (1 != argc - optind - 1) { fprintf(stderr, "Need exactly one BAM file as last argument\n"); return 1; } bam_file = (argv + optind + 1)[0]; if (! file_exists(bam_file)) { LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file); return -1; } if (! vcf_in) { #if 0 vcf_in = malloc(2 * sizeof(char)); strcpy(vcf_in, "-"); #else LOG_FATAL("%s\n", "No input vcf specified. Exiting..."); return -1; #endif } if (! vcf_out) { vcf_out = malloc(2 * sizeof(char)); strcpy(vcf_out, "-"); } if (vcf_file_open(& uniq_conf.vcf_in, vcf_in, HAS_GZIP_EXT(vcf_in), 'r')) { LOG_ERROR("Couldn't open %s\n", vcf_in); return 1; } if (vcf_file_open(& uniq_conf.vcf_out, vcf_out, HAS_GZIP_EXT(vcf_out), 'w')) { LOG_ERROR("Couldn't open %s\n", vcf_out); return 1; } if (0 != vcf_parse_header(&vcf_header, & uniq_conf.vcf_in)) { LOG_WARN("%s\n", "vcf_parse_header() failed. trying to rewind to start..."); if (vcf_file_seek(& uniq_conf.vcf_in, 0, SEEK_SET)) { LOG_FATAL("%s\n", "Couldn't rewind file to parse variants" " after header parsing failed"); return 1; } } else { vcf_header_add(&vcf_header, "##INFO=<ID=UNIQ,Number=0,Type=Flag,Description=\"Unique, i.e. not detectable in paired sample\">\n"); vcf_header_add(&vcf_header, "##INFO=<ID=UQ,Number=1,Type=Integer,Description=\"Phred-scaled uniq score at this position\">\n"); if (is_somatic) { vcf_header_add(&vcf_header, "##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic event\">\n"); } if (! uniq_conf.use_det_lim) { char full_filter_str[FILTER_STRSIZE]; if (uniq_conf.uniq_filter.thresh > 0) { snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "min_uq_%d", uniq_conf.uniq_filter.thresh); snprintf(full_filter_str, FILTER_STRSIZE, "##FILTER=<ID=%s,Description=\"Minimum Uniq Phred %d\">\n", uniq_conf.uniq_filter.id, uniq_conf.uniq_filter.thresh); vcf_header_add(&vcf_header, full_filter_str); } else if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) { char buf[64]; mtc_str(buf, uniq_conf.uniq_filter.mtc_type); snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "uq_%s", buf); snprintf(full_filter_str, FILTER_STRSIZE, "##FILTER=<ID=%s,Description=\"Uniq Multiple Testing Correction: %s corr. pvalue < %f\">\n", uniq_conf.uniq_filter.id, buf, uniq_conf.uniq_filter.alpha); vcf_header_add(& vcf_header, full_filter_str); } } vcf_write_header(& uniq_conf.vcf_out, vcf_header); free(vcf_header); } num_vars = vcf_parse_vars(&vars, & uniq_conf.vcf_in, 1); if (0 == num_vars) { LOG_WARN("%s\n", "Didn't find any variants in input"); goto clean_and_exit; } if (! uniq_conf.uniq_filter.ntests) { uniq_conf.uniq_filter.ntests = num_vars; } plp_proc_func = &uniq_snv; for (i=0; i<num_vars; i++) { char reg_buf[BUF_SIZE]; if (i%100==0) { LOG_VERBOSE("Processing variant %d of %d\n", i+1, num_vars); } uniq_conf.var = vars[i]; snprintf(reg_buf, BUF_SIZE, "%s:%ld-%ld", vars[i]->chrom, vars[i]->pos+1, vars[i]->pos+1); mplp_conf.reg = strdup(reg_buf); LOG_DEBUG("pileup for var no %d at %s %d\n", i+1, uniq_conf.var->chrom, uniq_conf.var->pos+1); #ifdef DISABLE_INDELS if (vcf_var_has_info_key(NULL, uniq_conf.var, "INDEL")) { LOG_WARN("Skipping indel var at %s %d\n", uniq_conf.var->chrom, uniq_conf.var->pos+1); free(mplp_conf.reg); mplp_conf.reg = NULL; continue; } #endif /* no need to check for filter because done by parse_vars */ rc = mpileup(&mplp_conf, plp_proc_func, (void*)&uniq_conf, 1, (const char **) argv + optind + 1); if (uniq_conf.uniq_filter.thresh) { apply_uniq_threshold(uniq_conf.var, & uniq_conf.uniq_filter); } free(mplp_conf.reg); mplp_conf.reg = NULL; } uniq_conf.var = NULL;/* just be sure to not use it accidentally again */ /* print whatever we've got. there's no UQ to test or we * are supposed to print all */ if (uniq_conf.use_det_lim) { for (i=0; i<num_vars; i++) { var_t *var = vars[i]; vcf_write_var(& uniq_conf.vcf_out, var); } /* all done */ goto clean_and_exit; } if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) { if (apply_uniq_filter_mtc(& uniq_conf.uniq_filter, vars, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on uniq pvalues failed"); return -1; } } for (i=0; i<num_vars; i++) { var_t *var = vars[i]; if (VCF_VAR_PASSES(var) || uniq_conf.output_all) { vcf_write_var(& uniq_conf.vcf_out, var); } } clean_and_exit: vcf_file_close(& uniq_conf.vcf_in); vcf_file_close(& uniq_conf.vcf_out); for (i=0; i<num_vars; i++) { vcf_free_var(& vars[i]); } free(vars); free(vcf_in); free(vcf_out); if (0==rc) { LOG_VERBOSE("%s\n", "Successful exit."); } /* LOG_FIXME("%s\n", "allow user setting of -S and -J. Currently just using default") */ return rc; }