Exemple #1
0
int bam_mpileup(int argc, char *argv[])
{
	int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0;
	mplp_conf_t mplp;
	memset(&mplp, 0, sizeof(mplp_conf_t));
	#define MPLP_PRINT_POS 0x4000
	mplp.max_mq = 60;
	mplp.min_baseQ = 13;
	mplp.capQ_thres = 0;
	mplp.max_depth = 250; mplp.max_indel_depth = 250;
	mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
	mplp.min_frac = 0.002; mplp.min_support = 1;
	mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
	while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) {
		switch (c) {
		case 'f':
			mplp.fai = fai_load(optarg);
			if (mplp.fai == 0) return 1;
			break;
		case 'd': mplp.max_depth = atoi(optarg); break;
		case 'r': mplp.reg = strdup(optarg); break;
		case 'l': mplp.bed = bed_read(optarg); break;
		case 'P': mplp.pl_list = strdup(optarg); break;
		case 'g': mplp.flag |= MPLP_GLF; break;
		case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
		case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
		case 'B': mplp.flag &= ~MPLP_REALN; break;
		case 'D': mplp.flag |= MPLP_FMT_DP; break;
		case 'S': mplp.flag |= MPLP_FMT_SP; break;
		case 'I': mplp.flag |= MPLP_NO_INDEL; break;
		case 'E': mplp.flag |= MPLP_EXT_BAQ; break;
		case '6': mplp.flag |= MPLP_ILLUMINA13; break;
		case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
		case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
		case 'O': mplp.flag |= MPLP_PRINT_POS; break;
		case 'C': mplp.capQ_thres = atoi(optarg); break;
		case 'M': mplp.max_mq = atoi(optarg); break;
		case 'q': mplp.min_mq = atoi(optarg); break;
		case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
		case 'o': mplp.openQ = atoi(optarg); break;
		case 'e': mplp.extQ = atoi(optarg); break;
		case 'h': mplp.tandemQ = atoi(optarg); break;
		case 'A': use_orphan = 1; break;
		case 'F': mplp.min_frac = atof(optarg); break;
		case 'm': mplp.min_support = atoi(optarg); break;
		case 'L': mplp.max_indel_depth = atoi(optarg); break;
		case 'G': {
				FILE *fp_rg;
				char buf[1024];
				mplp.rghash = bcf_str2id_init();
				if ((fp_rg = fopen(optarg, "r")) == 0)
					fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
				while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
					bcf_str2id_add(mplp.rghash, strdup(buf));
				fclose(fp_rg);
			}
			break;
		}
	}
	if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
	if (argc == 1) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n");
		fprintf(stderr, "Input options:\n\n");
		fprintf(stderr, "       -6           assume the quality is in the Illumina-1.3+ encoding\n");
		fprintf(stderr, "       -A           count anomalous read pairs\n");
		fprintf(stderr, "       -B           disable BAQ computation\n");
		fprintf(stderr, "       -b FILE      list of input BAM files [null]\n");
		fprintf(stderr, "       -C INT       parameter for adjusting mapQ; 0 to disable [0]\n");
		fprintf(stderr, "       -d INT       max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth);
		fprintf(stderr, "       -E           extended BAQ for higher sensitivity but lower specificity\n");
		fprintf(stderr, "       -f FILE      faidx indexed reference sequence file [null]\n");
		fprintf(stderr, "       -G FILE      exclude read groups listed in FILE [null]\n");
		fprintf(stderr, "       -l FILE      list of positions (chr pos) or regions (BED) [null]\n");
		fprintf(stderr, "       -M INT       cap mapping quality at INT [%d]\n", mplp.max_mq);
		fprintf(stderr, "       -r STR       region in which pileup is generated [null]\n");
		fprintf(stderr, "       -R           ignore RG tags\n");
		fprintf(stderr, "       -q INT       skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq);
		fprintf(stderr, "       -Q INT       skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ);
		fprintf(stderr, "\nOutput options:\n\n");
		fprintf(stderr, "       -D           output per-sample DP in BCF (require -g/-u)\n");
		fprintf(stderr, "       -g           generate BCF output (genotype likelihoods)\n");
		fprintf(stderr, "       -O           output base positions on reads (disabled by -g/-u)\n");
		fprintf(stderr, "       -s           output mapping quality (disabled by -g/-u)\n");
		fprintf(stderr, "       -S           output per-sample strand bias P-value in BCF (require -g/-u)\n");
		fprintf(stderr, "       -u           generate uncompress BCF output\n");
		fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n");
		fprintf(stderr, "       -e INT       Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ);
		fprintf(stderr, "       -F FLOAT     minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac);
		fprintf(stderr, "       -h INT       coefficient for homopolymer errors [%d]\n", mplp.tandemQ);
		fprintf(stderr, "       -I           do not perform indel calling\n");
		fprintf(stderr, "       -L INT       max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth);
		fprintf(stderr, "       -m INT       minimum gapped reads for indel candidates [%d]\n", mplp.min_support);
		fprintf(stderr, "       -o INT       Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ);
		fprintf(stderr, "       -P STR       comma separated list of platforms for indels [all]\n");
		fprintf(stderr, "\n");
		fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
		return 1;
	}
    if (file_list) {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        mpileup(&mplp,nfiles,fn);
        for (c=0; c<nfiles; c++) free(fn[c]);
        free(fn);
    } else mpileup(&mplp, argc - optind, argv + optind);
	if (mplp.rghash) bcf_str2id_thorough_destroy(mplp.rghash);
	free(mplp.reg); free(mplp.pl_list);
	if (mplp.fai) fai_destroy(mplp.fai);
	if (mplp.bed) bed_destroy(mplp.bed);
	return 0;
}
Exemple #2
0
int bam_mpileup(int argc, char *argv[])
{
    int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0;
    mplp_conf_t mplp;
    memset(&mplp, 0, sizeof(mplp_conf_t));
    mplp.min_baseQ = 13;
    mplp.capQ_thres = 0;
    mplp.max_depth = 250; mplp.max_indel_depth = 250;
    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
    mplp.min_frac = 0.002; mplp.min_support = 1;
    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
    mplp.argc = argc; mplp.argv = argv;
    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    mplp.output_fname = NULL;
    static const struct option lopts[] =
    {
        {"rf", required_argument, NULL, 1},   // require flag
        {"ff", required_argument, NULL, 2},   // filter flag
        {"incl-flags", required_argument, NULL, 1},
        {"excl-flags", required_argument, NULL, 2},
        {"output", required_argument, NULL, 3},
        {"open-prob", required_argument, NULL, 4},
        {"illumina1.3+", no_argument, NULL, '6'},
        {"count-orphans", no_argument, NULL, 'A'},
        {"bam-list", required_argument, NULL, 'b'},
        {"no-BAQ", no_argument, NULL, 'B'},
        {"no-baq", no_argument, NULL, 'B'},
        {"adjust-MQ", required_argument, NULL, 'C'},
        {"adjust-mq", required_argument, NULL, 'C'},
        {"max-depth", required_argument, NULL, 'd'},
        {"redo-BAQ", no_argument, NULL, 'E'},
        {"redo-baq", no_argument, NULL, 'E'},
        {"fasta-ref", required_argument, NULL, 'f'},
        {"exclude-RG", required_argument, NULL, 'G'},
        {"exclude-rg", required_argument, NULL, 'G'},
        {"positions", required_argument, NULL, 'l'},
        {"region", required_argument, NULL, 'r'},
        {"ignore-RG", no_argument, NULL, 'R'},
        {"ignore-rg", no_argument, NULL, 'R'},
        {"min-MQ", required_argument, NULL, 'q'},
        {"min-mq", required_argument, NULL, 'q'},
        {"min-BQ", required_argument, NULL, 'Q'},
        {"min-bq", required_argument, NULL, 'Q'},
        {"ignore-overlaps", no_argument, NULL, 'x'},
        {"BCF", no_argument, NULL, 'g'},
        {"bcf", no_argument, NULL, 'g'},
        {"VCF", no_argument, NULL, 'v'},
        {"vcf", no_argument, NULL, 'v'},
        {"output-BP", no_argument, NULL, 'O'},
        {"output-bp", no_argument, NULL, 'O'},
        {"output-MQ", no_argument, NULL, 's'},
        {"output-mq", no_argument, NULL, 's'},
        {"output-tags", required_argument, NULL, 't'},
        {"uncompressed", no_argument, NULL, 'u'},
        {"ext-prob", required_argument, NULL, 'e'},
        {"gap-frac", required_argument, NULL, 'F'},
        {"tandem-qual", required_argument, NULL, 'h'},
        {"skip-indels", no_argument, NULL, 'I'},
        {"max-idepth", required_argument, NULL, 'L'},
        {"min-ireads ", required_argument, NULL, 'm'},
        {"per-sample-mF", no_argument, NULL, 'p'},
        {"per-sample-mf", no_argument, NULL, 'p'},
        {"platforms", required_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };
    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) {
        switch (c) {
        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
        case  1 :
            mplp.rflag_require = bam_str2flag(optarg);
            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
            break;
        case  2 :
            mplp.rflag_filter = bam_str2flag(optarg);
            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
            break;
        case  3 : mplp.output_fname = optarg; break;
        case  4 : mplp.openQ = atoi(optarg); break;
        case 'f':
            mplp.fai = fai_load(optarg);
            if (mplp.fai == 0) return 1;
            mplp.fai_fname = optarg;
            break;
        case 'd': mplp.max_depth = atoi(optarg); break;
        case 'r': mplp.reg = strdup(optarg); break;
        case 'l':
                  // In the original version the whole BAM was streamed which is inefficient
                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
                  //  best strategy, that is streaming or jumping.
                  mplp.bed = bed_read(optarg);
                  if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; }
                  break;
        case 'P': mplp.pl_list = strdup(optarg); break;
        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
        case 'g': mplp.flag |= MPLP_BCF; break;
        case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break;
        case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break;
        case 'B': mplp.flag &= ~MPLP_REALN; break;
        case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
        case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
        case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
        case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
        case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
        case 'O': mplp.flag |= MPLP_PRINT_POS; break;
        case 'C': mplp.capQ_thres = atoi(optarg); break;
        case 'q': mplp.min_mq = atoi(optarg); break;
        case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
        case 'o': {
                char *end;
                long value = strtol(optarg, &end, 10);
                // Distinguish between -o INT and -o FILE (a bit of a hack!)
                if (*end == '\0') mplp.openQ = value;
                else mplp.output_fname = optarg;
            }
            break;
        case 'e': mplp.extQ = atoi(optarg); break;
        case 'h': mplp.tandemQ = atoi(optarg); break;
        case 'A': use_orphan = 1; break;
        case 'F': mplp.min_frac = atof(optarg); break;
        case 'm': mplp.min_support = atoi(optarg); break;
        case 'L': mplp.max_indel_depth = atoi(optarg); break;
        case 'G': {
                FILE *fp_rg;
                char buf[1024];
                mplp.rghash = khash_str2int_init();
                if ((fp_rg = fopen(optarg, "r")) == 0)
                    fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
                while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
                    khash_str2int_inc(mplp.rghash, strdup(buf));
                fclose(fp_rg);
            }
            break;
        case 't': mplp.fmt_flag |= parse_format_flag(optarg); break;
        default:
            fprintf(stderr,"Invalid option: '%c'\n", c);
            return 1;
        }
    }
    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
    {
        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
        return 1;
    }
    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
    if (argc == 1)
    {
        print_usage(stderr, &mplp);
        return 1;
    }
    int ret;
    if (file_list) {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        ret = mpileup(&mplp,nfiles,fn);
        for (c=0; c<nfiles; c++) free(fn[c]);
        free(fn);
    }
    else
        ret = mpileup(&mplp, argc - optind, argv + optind);
    if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash);
    free(mplp.reg); free(mplp.pl_list);
    if (mplp.fai) fai_destroy(mplp.fai);
    if (mplp.bed) bed_destroy(mplp.bed);
    return ret;
}
Exemple #3
0
int bam_mpileup(int argc, char *argv[])
{
    int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0, noref = 0;
    mplp_conf_t mplp;
    memset(&mplp, 0, sizeof(mplp_conf_t));
    mplp.min_baseQ = 13;
    mplp.capQ_thres = 0;
    mplp.max_depth = 250; mplp.max_indel_depth = 250;
    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
    mplp.min_frac = 0.002; mplp.min_support = 1;
    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
    mplp.argc = argc; mplp.argv = argv;
    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    mplp.output_fname = NULL;
    mplp.output_type = FT_VCF;
    mplp.record_cmd_line = 1;
    mplp.n_threads = 0;
    mplp.bsmpl = bam_smpl_init();

    static const struct option lopts[] =
    {
        {"rf", required_argument, NULL, 1},   // require flag
        {"ff", required_argument, NULL, 2},   // filter flag
        {"incl-flags", required_argument, NULL, 1},
        {"excl-flags", required_argument, NULL, 2},
        {"output", required_argument, NULL, 3},
        {"open-prob", required_argument, NULL, 4},
        {"ignore-RG", no_argument, NULL, 5},
        {"ignore-rg", no_argument, NULL, 5},
        {"gvcf", required_argument, NULL, 'g'},
        {"no-reference", no_argument, NULL, 7},
        {"no-version", no_argument, NULL, 8},
        {"threads",required_argument,NULL,9},
        {"illumina1.3+", no_argument, NULL, '6'},
        {"count-orphans", no_argument, NULL, 'A'},
        {"bam-list", required_argument, NULL, 'b'},
        {"no-BAQ", no_argument, NULL, 'B'},
        {"no-baq", no_argument, NULL, 'B'},
        {"adjust-MQ", required_argument, NULL, 'C'},
        {"adjust-mq", required_argument, NULL, 'C'},
        {"max-depth", required_argument, NULL, 'd'},
        {"redo-BAQ", no_argument, NULL, 'E'},
        {"redo-baq", no_argument, NULL, 'E'},
        {"fasta-ref", required_argument, NULL, 'f'},
        {"read-groups", required_argument, NULL, 'G'},
        {"region", required_argument, NULL, 'r'},
        {"regions", required_argument, NULL, 'r'},
        {"regions-file", required_argument, NULL, 'R'},
        {"targets", required_argument, NULL, 't'},
        {"targets-file", required_argument, NULL, 'T'},
        {"min-MQ", required_argument, NULL, 'q'},
        {"min-mq", required_argument, NULL, 'q'},
        {"min-BQ", required_argument, NULL, 'Q'},
        {"min-bq", required_argument, NULL, 'Q'},
        {"ignore-overlaps", no_argument, NULL, 'x'},
        {"output-type", required_argument, NULL, 'O'},
        {"samples", required_argument, NULL, 's'},
        {"samples-file", required_argument, NULL, 'S'},
        {"annotate", required_argument, NULL, 'a'},
        {"ext-prob", required_argument, NULL, 'e'},
        {"gap-frac", required_argument, NULL, 'F'},
        {"tandem-qual", required_argument, NULL, 'h'},
        {"skip-indels", no_argument, NULL, 'I'},
        {"max-idepth", required_argument, NULL, 'L'},
        {"min-ireads ", required_argument, NULL, 'm'},
        {"per-sample-mF", no_argument, NULL, 'p'},
        {"per-sample-mf", no_argument, NULL, 'p'},
        {"platforms", required_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };
    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
        switch (c) {
        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
        case  1 :
            mplp.rflag_require = bam_str2flag(optarg);
            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
            break;
        case  2 :
            mplp.rflag_filter = bam_str2flag(optarg);
            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
            break;
        case  3 : mplp.output_fname = optarg; break;
        case  4 : mplp.openQ = atoi(optarg); break;
        case  5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break;
        case 'g':
            mplp.gvcf = gvcf_init(optarg);
            if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
            break;
        case 'f':
            mplp.fai = fai_load(optarg);
            if (mplp.fai == NULL) return 1;
            mplp.fai_fname = optarg;
            break;
        case  7 : noref = 1; break;
        case  8 : mplp.record_cmd_line = 0; break;
        case  9 : mplp.n_threads = strtol(optarg, 0, 0); break;
        case 'd': mplp.max_depth = atoi(optarg); break;
        case 'r': mplp.reg_fname = strdup(optarg); break;
        case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break;
        case 't':
                  // In the original version the whole BAM was streamed which is inefficient
                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
                  //  best strategy, that is streaming or jumping.
                  if ( optarg[0]=='^' ) optarg++;
                  else mplp.bed_logic = 1;
                  mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL);
                  mplp.bed_itr = regitr_init(mplp.bed);
                  if ( regidx_insert_list(mplp.bed,optarg,',') !=0 )
                  {
                      fprintf(stderr,"Could not parse the targets: %s\n", optarg);
                      exit(EXIT_FAILURE);
                  }
                  break;
        case 'T':
                  if ( optarg[0]=='^' ) optarg++;
                  else mplp.bed_logic = 1;
                  mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL);
                  if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; }
                  break;
        case 'P': mplp.pl_list = strdup(optarg); break;
        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
        case 'B': mplp.flag &= ~MPLP_REALN; break;
        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
        case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
        case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
        case 'O': 
            switch (optarg[0]) {
                case 'b': mplp.output_type = FT_BCF_GZ; break;
                case 'u': mplp.output_type = FT_BCF; break;
                case 'z': mplp.output_type = FT_VCF_GZ; break;
                case 'v': mplp.output_type = FT_VCF; break;
                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); 
            }
            break;
        case 'C': mplp.capQ_thres = atoi(optarg); break;
        case 'q': mplp.min_mq = atoi(optarg); break;
        case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
        case 'o': {
                char *end;
                long value = strtol(optarg, &end, 10);
                // Distinguish between -o INT and -o FILE (a bit of a hack!)
                if (*end == '\0') mplp.openQ = value;
                else mplp.output_fname = optarg;
            }
            break;
        case 'e': mplp.extQ = atoi(optarg); break;
        case 'h': mplp.tandemQ = atoi(optarg); break;
        case 'A': use_orphan = 1; break;
        case 'F': mplp.min_frac = atof(optarg); break;
        case 'm': mplp.min_support = atoi(optarg); break;
        case 'L': mplp.max_indel_depth = atoi(optarg); break;
        case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break;
        case 'a':
            if (optarg[0]=='?') {
                list_annotations(stderr);
                return 1;
            }
            mplp.fmt_flag |= parse_format_flag(optarg);
        break;
        default:
            fprintf(stderr,"Invalid option: '%c'\n", c);
            return 1;
        }
    }

    if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) )
    {
        fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n");
        mplp.fmt_flag |= B2B_FMT_DP;
    }
    if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) )
    {
        if ( mplp.flag&MPLP_VCF )
        {
            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF;
            else mplp.output_type = FT_VCF_GZ;
        }
        else if ( mplp.flag&MPLP_BCF )
        {
            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF;
            else mplp.output_type = FT_BCF_GZ;
        }
    }
    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
    {
        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
        return 1;
    }
    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
    if (argc == 1)
    {
        print_usage(stderr, &mplp);
        return 1;
    }
    if (!mplp.fai && !noref) {
        fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n");
        return 1;
    }
    int ret,i;
    if (file_list) 
    {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        mplp.files  = fn;
        mplp.nfiles = nfiles;
    }
    else
    {
        mplp.nfiles = argc - optind;
        mplp.files  = (char**) malloc(mplp.nfiles*sizeof(char*));
        for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]);
    }
    ret = mpileup(&mplp);

    for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]);
    free(mplp.files);
    free(mplp.reg_fname); free(mplp.pl_list);
    if (mplp.fai) fai_destroy(mplp.fai);
    if (mplp.bed) regidx_destroy(mplp.bed);
    if (mplp.bed_itr) regitr_destroy(mplp.bed_itr);
    if (mplp.reg) regidx_destroy(mplp.reg);
    bam_smpl_destroy(mplp.bsmpl);
    return ret;
}
Exemple #4
0
int
main_uniq(int argc, char *argv[])
{
     int c, i;
     char *bam_file = NULL;
     char *vcf_in = NULL; /* - == stdout */
     char *vcf_out = NULL; /* - == stdout */
     mplp_conf_t mplp_conf;
     uniq_conf_t uniq_conf;
     void (*plp_proc_func)(const plp_col_t*, void*);
     int rc = 0;
     var_t **vars = NULL;
     int num_vars = 0;
     char *vcf_header = NULL;
     static int use_det_lim = 0;
     static int use_orphan = 0;
     static int output_all = 0;
     static int is_somatic = 0;

     /* default uniq options */
     memset(&uniq_conf, 0, sizeof(uniq_conf_t));
     uniq_conf.uni_freq = DEFAULT_UNI_FREQ;
     uniq_conf.use_det_lim = 0;

     uniq_conf.uniq_filter.mtc_type = MTC_FDR;
     uniq_conf.uniq_filter.alpha = 0.001;

     /* default pileup options */
     memset(&mplp_conf, 0, sizeof(mplp_conf_t));
     mplp_conf.max_mq = DEFAULT_MAX_MQ;
     mplp_conf.min_mq = 1;
     mplp_conf.min_plp_bq = DEFAULT_MIN_PLP_BQ;
     mplp_conf.max_depth = DEFAULT_MAX_PLP_DEPTH;
     mplp_conf.flag = MPLP_NO_ORPHAN;


    /* keep in sync with long_opts_str and usage
     *
     * getopt is a pain in the whole when it comes to syncing of long
     * and short args and usage. check out gopt, libcfu...
     */
    while (1) {
         static struct option long_opts[] = {
              /* see usage sync */
              {"help", no_argument, NULL, 'h'},
              {"verbose", no_argument, &verbose, 1},
              {"debug", no_argument, &debug, 1},
              {"use-det-lim", no_argument, &use_det_lim, 1},
              {"use-orphan", no_argument, &use_orphan, 1},
              {"output-all", no_argument, &output_all, 1},
              {"is-somatic", no_argument, &is_somatic, 1},

              {"vcf-in", required_argument, NULL, 'v'},
              {"vcf-out", required_argument, NULL, 'o'},

              {"uni-freq", required_argument, NULL, 'f'},

              {"uniq-thresh", required_argument, NULL, 't'},
              {"uniq-mtc", required_argument, NULL, 'm'},
              {"uniq-alpha", required_argument, NULL, 'a'},
              {"uniq-ntests", required_argument, NULL, 'n'},

              {0, 0, 0, 0} /* sentinel */
         };

         /* keep in sync with long_opts and usage */
         static const char *long_opts_str = "hv:o:f:t:m:a:n:";

         /* getopt_long stores the option index here. */
         int long_opts_index = 0;
         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
                         long_opts_str, long_opts, & long_opts_index);
         if (c == -1) {
              break;
         }

         switch (c) {
         /* keep in sync with long_opts etc */
         case 'h':
              usage(& uniq_conf);
              return 0;

         case 'v':
              if (0 != strcmp(optarg, "-")) {
                   if (! file_exists(optarg)) {
                        LOG_FATAL("Input file '%s' does not exist. Exiting...\n", optarg);
                        return 1;
                   }
              }
              vcf_in = strdup(optarg);
              break;

         case 'o':
              if (0 != strcmp(optarg, "-")) {
                   if (file_exists(optarg)) {
                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
                        return 1;
                   }
              }
              vcf_out = strdup(optarg);
              break;

         case 'f':
              uniq_conf.uni_freq = strtof(optarg, (char **)NULL); /* atof */
              if (uniq_conf.uni_freq<=0) {
                   LOG_WARN("%s\n", "Ignoring uni-freq option");
              }
              if (uniq_conf.uni_freq>1.0) {
                   LOG_FATAL("%s\n", "Value for uni-freq has to be <1.0");
                   return 1;
              }
              break;

         case 't':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              uniq_conf.uniq_filter.thresh = atoi(optarg);
              uniq_conf.uniq_filter.mtc_type = MTC_NONE;
              break;
         case 'm':
              uniq_conf.uniq_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == uniq_conf.uniq_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'a':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              uniq_conf.uniq_filter.alpha = strtof(optarg, NULL);
              break;
         case 'n':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              uniq_conf.uniq_filter.ntests = atol(optarg);
              break;

         case '?':
              LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n");
              return 1;
         default:
              break;
         }
    }
    if (use_orphan) {
         mplp_conf.flag &= ~MPLP_NO_ORPHAN;
    }
    if (debug) {
         dump_mplp_conf(& mplp_conf, stderr);
    }
    uniq_conf.output_all = output_all;
    uniq_conf.use_det_lim = use_det_lim;


#if DEBUG
    LOG_DEBUG("uniq_conf.uniq_filter.thresh = %d\n", uniq_conf.uniq_filter.thresh);
    LOG_DEBUG("uniq_conf.uniq_filter.mtc_type = %d\n", uniq_conf.uniq_filter.mtc_type);
    LOG_DEBUG("uniq_conf.uniq_filter.alpha = %f\n", uniq_conf.uniq_filter.alpha);
    LOG_DEBUG("uniq_conf.uniq_filter.ntests = %d\n", uniq_conf.uniq_filter.ntests);
#endif
    
    if (uniq_conf.uniq_filter.thresh && uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed Unique quality threshold *and* multiple testing correction.");
         return 1;
    }

    if (argc == 2) {
        fprintf(stderr, "\n");
        usage(& uniq_conf);
        return 1;
    }

    if (1 != argc - optind - 1) {
        fprintf(stderr, "Need exactly one BAM file as last argument\n");
        return 1;
    }
    bam_file = (argv + optind + 1)[0];
    if (! file_exists(bam_file)) {
         LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file);
         return -1;
    }


    if (! vcf_in) {
#if 0
         vcf_in = malloc(2 * sizeof(char));
         strcpy(vcf_in, "-");
#else
         LOG_FATAL("%s\n", "No input vcf specified. Exiting...");
         return -1;
#endif
    }
    if (! vcf_out) {
         vcf_out = malloc(2 * sizeof(char));
         strcpy(vcf_out, "-");
    }

    if (vcf_file_open(& uniq_conf.vcf_in, vcf_in,
                      HAS_GZIP_EXT(vcf_in), 'r')) {
         LOG_ERROR("Couldn't open %s\n", vcf_in);
         return 1;
    }

    if (vcf_file_open(& uniq_conf.vcf_out, vcf_out,
                      HAS_GZIP_EXT(vcf_out), 'w')) {
         LOG_ERROR("Couldn't open %s\n", vcf_out);
         return 1;
    }

    if (0 != vcf_parse_header(&vcf_header, & uniq_conf.vcf_in)) {
         LOG_WARN("%s\n", "vcf_parse_header() failed. trying to rewind to start...");
         if (vcf_file_seek(& uniq_conf.vcf_in, 0, SEEK_SET)) {
              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
                        " after header parsing failed");
              return 1;
         }
    } else {
         vcf_header_add(&vcf_header, "##INFO=<ID=UNIQ,Number=0,Type=Flag,Description=\"Unique, i.e. not detectable in paired sample\">\n");
         vcf_header_add(&vcf_header, "##INFO=<ID=UQ,Number=1,Type=Integer,Description=\"Phred-scaled uniq score at this position\">\n");
         if (is_somatic) {
              vcf_header_add(&vcf_header, "##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic event\">\n");
         }
         if (! uniq_conf.use_det_lim) {
              char full_filter_str[FILTER_STRSIZE];
              if (uniq_conf.uniq_filter.thresh > 0) {
                   snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "min_uq_%d", uniq_conf.uniq_filter.thresh);
                   snprintf(full_filter_str, FILTER_STRSIZE,
                            "##FILTER=<ID=%s,Description=\"Minimum Uniq Phred %d\">\n",
                            uniq_conf.uniq_filter.id, uniq_conf.uniq_filter.thresh);
                   vcf_header_add(&vcf_header, full_filter_str);
                   
              } else if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
                   char buf[64];
                   mtc_str(buf, uniq_conf.uniq_filter.mtc_type);
                   snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "uq_%s", buf);
                   snprintf(full_filter_str, FILTER_STRSIZE,
                            "##FILTER=<ID=%s,Description=\"Uniq Multiple Testing Correction: %s corr. pvalue < %f\">\n",
                            uniq_conf.uniq_filter.id, buf, uniq_conf.uniq_filter.alpha);
                   vcf_header_add(& vcf_header, full_filter_str);
              }
         }

         vcf_write_header(& uniq_conf.vcf_out, vcf_header);
         free(vcf_header);
    }

    num_vars = vcf_parse_vars(&vars, & uniq_conf.vcf_in, 1);
    if (0 == num_vars) {
         LOG_WARN("%s\n", "Didn't find any variants in input");
         goto clean_and_exit;
    }
    if (! uniq_conf.uniq_filter.ntests) {
         uniq_conf.uniq_filter.ntests = num_vars;
    }

    plp_proc_func = &uniq_snv;

    for (i=0; i<num_vars; i++) {
         char reg_buf[BUF_SIZE];
         if (i%100==0) {
              LOG_VERBOSE("Processing variant %d of %d\n", i+1, num_vars);
         }
         uniq_conf.var = vars[i];

         snprintf(reg_buf, BUF_SIZE, "%s:%ld-%ld",
                  vars[i]->chrom, vars[i]->pos+1, vars[i]->pos+1);
         mplp_conf.reg = strdup(reg_buf);

         LOG_DEBUG("pileup for var no %d at %s %d\n",
                   i+1, uniq_conf.var->chrom, uniq_conf.var->pos+1);
#ifdef DISABLE_INDELS
         if (vcf_var_has_info_key(NULL, uniq_conf.var, "INDEL")) {
              LOG_WARN("Skipping indel var at %s %d\n",
                       uniq_conf.var->chrom, uniq_conf.var->pos+1);
              free(mplp_conf.reg);
              mplp_conf.reg = NULL;
              continue;
         }
#endif
         /* no need to check for filter because done by parse_vars */

         rc = mpileup(&mplp_conf, plp_proc_func, (void*)&uniq_conf,
                      1, (const char **) argv + optind + 1);

         if (uniq_conf.uniq_filter.thresh) {
              apply_uniq_threshold(uniq_conf.var, & uniq_conf.uniq_filter);
         }

         free(mplp_conf.reg);
         mplp_conf.reg = NULL;
    }
    uniq_conf.var = NULL;/* just be sure to not use it accidentally again */


    /* print whatever we've got. there's no UQ to test or we
     * are supposed to print all 
     */
    if (uniq_conf.use_det_lim) {
         for (i=0; i<num_vars; i++) {
              var_t *var = vars[i];
              vcf_write_var(& uniq_conf.vcf_out, var);
         }
         /* all done */
         goto clean_and_exit;
    }



    if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
         if (apply_uniq_filter_mtc(& uniq_conf.uniq_filter, vars, num_vars)) {
              LOG_FATAL("%s\n", "Multiple testing correction on uniq pvalues failed");
              return -1;
         }
    }
    
    for (i=0; i<num_vars; i++) {
         var_t *var = vars[i];
         if (VCF_VAR_PASSES(var) || uniq_conf.output_all) {
              vcf_write_var(& uniq_conf.vcf_out, var);
         }
    }

clean_and_exit:

    vcf_file_close(& uniq_conf.vcf_in);
    vcf_file_close(& uniq_conf.vcf_out);

    for (i=0; i<num_vars; i++) {
         vcf_free_var(& vars[i]);
    }
    free(vars);

    free(vcf_in);
    free(vcf_out);

    if (0==rc) {
         LOG_VERBOSE("%s\n", "Successful exit.");
    }
    /* LOG_FIXME("%s\n", "allow user setting of -S and -J. Currently just using default") */

    return rc;
}