void into_menu() { char imagename[16]; switch(main_menu_point) { case 0: work = BOOK_MENU; workdircase = ONLY_FILE; strcpy(workdirname, "book"); strcpy(imagename, "sys/book.api"); break; case 1: work = MUSIC_MENU; workdircase = ONLY_FILE; strcpy(workdirname, "/"); strcpy(imagename, "sys/music.api"); break; case 2: work = IMAGE_MENU; workdircase = ONLY_FILE; strcpy(workdirname, "image"); strcpy(imagename, "sys/image.api"); break; case 3: work = GAME_MENU; workdircase = ONLY_DIR; strcpy(workdirname, "game"); strcpy(imagename, "sys/game.api"); break; case 4: work = FILE_MENU; workdircase = BOTH_FILE_DIR; strcpy(workdirname, "/"); strcpy(imagename, "sys/file.api"); break; case 5: into_about(); return; } myGLCD.fillScr(255, 255, 255); show_apimg(23, 23, imagename); //显示图标 file_amount = get_file_amount(workdirname, workdircase); //文件数量 file_offset = 0; read_file_list(workdirname, file_offset, workdircase); //载入文件列表 if(file_amount) { show_file_menu(); //显示文件列表 file_list_point = 0; myGLCD.setColor(0, 0, 255); myGLCD.drawRect(210, 2, 218, 173); draw_file_list_point(file_list_point, 255, 0, 0); //光标 } else show_chinese_sentence(111, 4, "\xCE\xDE\xCE\xC4\xBC\xFE", 0, 0, 0, 1); //无文件 return; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); #define MPLP_PRINT_POS 0x4000 mplp.max_mq = 60; mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': mplp.bed = bed_read(optarg); break; case 'P': mplp.pl_list = strdup(optarg); break; case 'g': mplp.flag |= MPLP_GLF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break; case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.flag |= MPLP_FMT_DP; break; case 'S': mplp.flag |= MPLP_FMT_SP; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_EXT_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': mplp.openQ = atoi(optarg); break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = bcf_str2id_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... bcf_str2id_add(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; } } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n"); fprintf(stderr, "Input options:\n\n"); fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A count anomalous read pairs\n"); fprintf(stderr, " -B disable BAQ computation\n"); fprintf(stderr, " -b FILE list of input BAM files [null]\n"); fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n"); fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth); fprintf(stderr, " -E extended BAQ for higher sensitivity but lower specificity\n"); fprintf(stderr, " -f FILE faidx indexed reference sequence file [null]\n"); fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n"); fprintf(stderr, " -l FILE list of positions (chr pos) or regions (BED) [null]\n"); fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); fprintf(stderr, " -r STR region in which pileup is generated [null]\n"); fprintf(stderr, " -R ignore RG tags\n"); fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ); fprintf(stderr, "\nOutput options:\n\n"); fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n"); fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n"); fprintf(stderr, " -O output base positions on reads (disabled by -g/-u)\n"); fprintf(stderr, " -s output mapping quality (disabled by -g/-u)\n"); fprintf(stderr, " -S output per-sample strand bias P-value in BCF (require -g/-u)\n"); fprintf(stderr, " -u generate uncompress BCF output\n"); fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n"); fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ); fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); fprintf(stderr, " -h INT coefficient for homopolymer errors [%d]\n", mplp.tandemQ); fprintf(stderr, " -I do not perform indel calling\n"); fprintf(stderr, " -L INT max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth); fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ); fprintf(stderr, " -P STR comma separated list of platforms for indels [all]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; } if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mpileup(&mplp,nfiles,fn); for (c=0; c<nfiles; c++) free(fn[c]); free(fn); } else mpileup(&mplp, argc - optind, argv + optind); if (mplp.rghash) bcf_str2id_thorough_destroy(mplp.rghash); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) bed_destroy(mplp.bed); return 0; }
int main_depth(int argc, char *argv[]) { int i, n, tid, reg_tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0; int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure char *file_list = NULL, **fn = NULL; bam_hdr_t *h = NULL; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; int last_pos = -1, last_tid = -1, ret; sam_global_args ga = SAM_GLOBAL_ARGS_INIT; static const struct option lopts[] = { SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0), { NULL, 0, NULL, 0 } }; // parse the command line while ((n = getopt_long(argc, argv, "r:b:q:Q:l:f:am:d:", lopts, NULL)) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); // BED or position list file can be parsed now if (!bed) { print_error_errno("depth", "Could not read file \"%s\"", optarg); return 1; } break; case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold case 'f': file_list = optarg; break; case 'a': all++; break; case 'd': case 'm': max_depth = atoi(optarg); break; // maximum coverage depth default: if (parse_sam_global_opt(n, optarg, lopts, &ga) == 0) break; /* else fall-through */ case '?': return usage(); } } if (optind == argc && !file_list) return usage(); // initialize the auxiliary data structures if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; n = nfiles; argv = fn; optind = 0; } else n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input reg_tid = 0; beg = 0; end = INT_MAX; // set the default region for (i = 0; i < n; ++i) { int rf; data[i] = calloc(1, sizeof(aux_t)); data[i]->fp = sam_open_format(argv[optind+i], "r", &ga.in); // open BAM if (data[i]->fp == NULL) { print_error_errno("depth", "Could not open \"%s\"", argv[optind+i]); status = EXIT_FAILURE; goto depth_end; } rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ; if (baseQ) rf |= SAM_QUAL; if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) { fprintf(stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n"); return 1; } if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) { fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n"); return 1; } data[i]->min_mapQ = mapQ; // set the mapQ filter data[i]->min_len = min_len; // set the qlen filter data[i]->hdr = sam_hdr_read(data[i]->fp); // read the BAM header if (data[i]->hdr == NULL) { fprintf(stderr, "Couldn't read header for \"%s\"\n", argv[optind+i]); status = EXIT_FAILURE; goto depth_end; } if (reg) { // if a region is specified hts_idx_t *idx = sam_index_load(data[i]->fp, argv[optind+i]); // load the index if (idx == NULL) { print_error("depth", "can't load index for \"%s\"", argv[optind+i]); status = EXIT_FAILURE; goto depth_end; } data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator hts_idx_destroy(idx); // the index is not needed any more; free the memory if (data[i]->iter == NULL) { print_error("depth", "can't parse region \"%s\"", reg); status = EXIT_FAILURE; goto depth_end; } } } h = data[0]->hdr; // easy access to the header of the 1st BAM if (reg) { beg = data[0]->iter->beg; // and to the parsed region coordinates end = data[0]->iter->end; reg_tid = data[0]->iter->tid; } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization if (0 < max_depth) bam_mplp_set_maxcnt(mplp,max_depth); // set maximum coverage depth n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp) while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (tid >= h->n_targets) continue; // diff number of @SQ lines per file? if (all) { while (tid > last_tid) { if (last_tid >= 0 && !reg) { // Deal with remainder or entirety of last tid. while (++last_pos < h->target_len[last_tid]) { // Horribly inefficient, but the bed API is an obfuscated black box. if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0) continue; fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1); for (i = 0; i < n; i++) putchar('\t'), putchar('0'); putchar('\n'); } } last_tid++; last_pos = -1; if (all < 2) break; } // Deal with missing portion of current tid while (++last_pos < pos) { if (last_pos < beg) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], last_pos, last_pos + 1) == 0) continue; fputs(h->target_name[tid], stdout); printf("\t%d", last_pos+1); for (i = 0; i < n; i++) putchar('\t'), putchar('0'); putchar('\n'); } last_tid = tid; last_pos = pos; } if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } if (ret < 0) status = EXIT_FAILURE; free(n_plp); free(plp); bam_mplp_destroy(mplp); if (all) { // Handle terminating region if (last_tid < 0 && reg && all > 1) { last_tid = reg_tid; last_pos = beg-1; } while (last_tid >= 0 && last_tid < h->n_targets) { while (++last_pos < h->target_len[last_tid]) { if (last_pos >= end) break; if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0) continue; fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1); for (i = 0; i < n; i++) putchar('\t'), putchar('0'); putchar('\n'); } last_tid++; last_pos = -1; if (all < 2 || reg) break; } } depth_end: for (i = 0; i < n && data[i]; ++i) { bam_hdr_destroy(data[i]->hdr); if (data[i]->fp) sam_close(data[i]->fp); hts_itr_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); if ( file_list ) { for (i=0; i<n; i++) free(fn[i]); free(fn); } sam_global_args_free(&ga); return status; }
int main_depth(int argc, char *argv[]) #endif { int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure char *file_list = NULL, **fn = NULL; bam_header_t *h = 0; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold case 'f': file_list = optarg; break; } } if (optind == argc && !file_list) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -b <bed> list of positions or regions\n"); fprintf(stderr, " -f <list> list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -l <int> minQLen\n"); fprintf(stderr, " -q <int> base quality threshold\n"); fprintf(stderr, " -Q <int> mapping quality threshold\n"); fprintf(stderr, " -r <chr:from-to> region\n"); fprintf(stderr, "\n"); return 1; } // initialize the auxiliary data structures if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; n = nfiles; argv = fn; optind = 0; } else n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(void*)); // data[i] for the i-th input beg = 0; end = 1<<30; tid = -1; // set the default region for (i = 0; i < n; ++i) { bam_header_t *htmp; data[i] = calloc(1, sizeof(aux_t)); data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM data[i]->min_mapQ = mapQ; // set the mapQ filter data[i]->min_len = min_len; // set the qlen filter htmp = bam_header_read(data[i]->fp); // read the BAM header if (i == 0) { h = htmp; // keep the header of the 1st BAM if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header if (tid >= 0) { // if a region is specified and parsed successfully bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator bam_index_destroy(idx); // the index is not needed any more; phase out of the memory } } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization bam_mplp_set_maxcnt(mplp,2147483647); // set max_depth to int max n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp) while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } free(n_plp); free(plp); bam_mplp_destroy(mplp); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); if ( file_list ) { for (i=0; i<n; i++) free(fn[i]); free(fn); } return 0; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0, noref = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; mplp.output_type = FT_VCF; mplp.record_cmd_line = 1; mplp.n_threads = 0; mplp.bsmpl = bam_smpl_init(); static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"ignore-RG", no_argument, NULL, 5}, {"ignore-rg", no_argument, NULL, 5}, {"gvcf", required_argument, NULL, 'g'}, {"no-reference", no_argument, NULL, 7}, {"no-version", no_argument, NULL, 8}, {"threads",required_argument,NULL,9}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"read-groups", required_argument, NULL, 'G'}, {"region", required_argument, NULL, 'r'}, {"regions", required_argument, NULL, 'r'}, {"regions-file", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 't'}, {"targets-file", required_argument, NULL, 'T'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"output-type", required_argument, NULL, 'O'}, {"samples", required_argument, NULL, 's'}, {"samples-file", required_argument, NULL, 'S'}, {"annotate", required_argument, NULL, 'a'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break; case 'g': mplp.gvcf = gvcf_init(optarg); if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == NULL) return 1; mplp.fai_fname = optarg; break; case 7 : noref = 1; break; case 8 : mplp.record_cmd_line = 0; break; case 9 : mplp.n_threads = strtol(optarg, 0, 0); break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg_fname = strdup(optarg); break; case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break; case 't': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL); mplp.bed_itr = regitr_init(mplp.bed); if ( regidx_insert_list(mplp.bed,optarg,',') !=0 ) { fprintf(stderr,"Could not parse the targets: %s\n", optarg); exit(EXIT_FAILURE); } break; case 'T': if ( optarg[0]=='^' ) optarg++; else mplp.bed_logic = 1; mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL); if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break; case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break; case 'O': switch (optarg[0]) { case 'b': mplp.output_type = FT_BCF_GZ; break; case 'u': mplp.output_type = FT_BCF; break; case 'z': mplp.output_type = FT_VCF_GZ; break; case 'v': mplp.output_type = FT_VCF; break; default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); } break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break; case 'a': if (optarg[0]=='?') { list_annotations(stderr); return 1; } mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) ) { fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n"); mplp.fmt_flag |= B2B_FMT_DP; } if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) ) { if ( mplp.flag&MPLP_VCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF; else mplp.output_type = FT_VCF_GZ; } else if ( mplp.flag&MPLP_BCF ) { if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF; else mplp.output_type = FT_BCF_GZ; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } if (!mplp.fai && !noref) { fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n"); return 1; } int ret,i; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; mplp.files = fn; mplp.nfiles = nfiles; } else { mplp.nfiles = argc - optind; mplp.files = (char**) malloc(mplp.nfiles*sizeof(char*)); for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]); } ret = mpileup(&mplp); for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]); free(mplp.files); free(mplp.reg_fname); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) regidx_destroy(mplp.bed); if (mplp.bed_itr) regitr_destroy(mplp.bed_itr); if (mplp.reg) regidx_destroy(mplp.reg); bam_smpl_destroy(mplp.bsmpl); return ret; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"exclude-RG", required_argument, NULL, 'G'}, {"exclude-rg", required_argument, NULL, 'G'}, {"positions", required_argument, NULL, 'l'}, {"region", required_argument, NULL, 'r'}, {"ignore-RG", no_argument, NULL, 'R'}, {"ignore-rg", no_argument, NULL, 'R'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"BCF", no_argument, NULL, 'g'}, {"bcf", no_argument, NULL, 'g'}, {"VCF", no_argument, NULL, 'v'}, {"vcf", no_argument, NULL, 'v'}, {"output-BP", no_argument, NULL, 'O'}, {"output-bp", no_argument, NULL, 'O'}, {"output-MQ", no_argument, NULL, 's'}, {"output-mq", no_argument, NULL, 's'}, {"output-tags", required_argument, NULL, 't'}, {"uncompressed", no_argument, NULL, 'u'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; mplp.fai_fname = optarg; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. mplp.bed = bed_read(optarg); if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'g': mplp.flag |= MPLP_BCF; break; case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break; case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break; case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = khash_str2int_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... khash_str2int_inc(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; case 't': mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } int ret; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; ret = mpileup(&mplp,nfiles,fn); for (c=0; c<nfiles; c++) free(fn[c]); free(fn); } else ret = mpileup(&mplp, argc - optind, argv + optind); if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) bed_destroy(mplp.bed); return ret; }
int main_depth(int argc, char *argv[]) { int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, status = EXIT_SUCCESS, nfiles; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure char *file_list = NULL, **fn = NULL; bam_hdr_t *h = NULL; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); // BED or position list file can be parsed now if (!bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; } break; case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold case 'f': file_list = optarg; break; } } if (optind == argc && !file_list) { fprintf(pysamerr, "\n"); fprintf(pysamerr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n"); fprintf(pysamerr, "Options:\n"); fprintf(pysamerr, " -b <bed> list of positions or regions\n"); fprintf(pysamerr, " -f <list> list of input BAM filenames, one per line [null]\n"); fprintf(pysamerr, " -l <int> read length threshold (ignore reads shorter than <int>)\n"); fprintf(pysamerr, " -q <int> base quality threshold\n"); fprintf(pysamerr, " -Q <int> mapping quality threshold\n"); fprintf(pysamerr, " -r <chr:from-to> region\n"); fprintf(pysamerr, "\n"); return 1; } // initialize the auxiliary data structures if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; n = nfiles; argv = fn; optind = 0; } else n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input beg = 0; end = 1<<30; // set the default region for (i = 0; i < n; ++i) { data[i] = calloc(1, sizeof(aux_t)); data[i]->fp = sam_open(argv[optind+i], "r"); // open BAM if (data[i]->fp == NULL) { print_error_errno("Could not open \"%s\"", argv[optind+i]); status = EXIT_FAILURE; goto depth_end; } if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ)) { fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n"); return 1; } if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) { fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n"); return 1; } data[i]->min_mapQ = mapQ; // set the mapQ filter data[i]->min_len = min_len; // set the qlen filter data[i]->hdr = sam_hdr_read(data[i]->fp); // read the BAM header if (reg) { // if a region is specified hts_idx_t *idx = sam_index_load(data[i]->fp, argv[optind+i]); // load the index if (idx == NULL) { print_error("can't load index for \"%s\"", argv[optind+i]); status = EXIT_FAILURE; goto depth_end; } data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator hts_idx_destroy(idx); // the index is not needed any more; free the memory if (data[i]->iter == NULL) { print_error("can't parse region \"%s\"", reg); status = EXIT_FAILURE; goto depth_end; } } } h = data[0]->hdr; // easy access to the header of the 1st BAM if (reg) { beg = data[0]->iter->beg; // and to the parsed region coordinates end = data[0]->iter->end; } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp) while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } free(n_plp); free(plp); bam_mplp_destroy(mplp); depth_end: for (i = 0; i < n && data[i]; ++i) { bam_hdr_destroy(data[i]->hdr); if (data[i]->fp) sam_close(data[i]->fp); hts_itr_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); if ( file_list ) { for (i=0; i<n; i++) free(fn[i]); free(fn); } return status; }
static int read_directory(const char * dname) { int ret = 0; solist_t files; solist_t dirs; strobj_t sobj; //d_dbg("%s: dname = '%s'\n",__func__,dname); do { files = solist_new(); dirs = solist_new(); if (!files || !dirs) { ret = -1; verbose("solist_new() return error!\n"); break; } ret = read_file_list(dname, files, dirs); if (ret < 0) { verbose("error reading file list @ '%s' !\n", dname); break; } /* handle files first */ ret = 0; while ((sobj=solist_get_next(files)) != NULL) { ret = read_file(sobj_get_string(sobj)); if (ret < 0) { verbose("error handling file - '%s'\n",sobj_get_string(sobj)); break; } } if (ret < 0) break; /* handle diretories if recursive. */ if (o_recursive) { ret = 0; while ((sobj=solist_get_next(dirs)) != NULL) { ret = read_directory(sobj_get_string(sobj)); if (ret < 0) { verbose("error handling directory - '%s'\n",sobj_get_string(sobj)); break; } } if (ret < 0) break; } } while (0); if (files) solist_del(files); if (dirs) solist_del(dirs); return ret; }