int main_vcfview(int argc, char *argv[]) { int c, clevel = -1, in_type = FT_BCF, out_type = FT_VCF; char *fname_out = NULL, moder[8], modew[8]; while ((c = getopt(argc, argv, "l:bvo:n:z?hu")) >= 0) { switch (c) { case 'o': switch (optarg[0]) { case 'b': out_type = FT_BCF_GZ; break; case 'u': out_type = FT_BCF; break; case 'z': out_type = FT_VCF_GZ; break; case 'v': out_type = FT_VCF; break; default: error("The output type \"%s\" not recognised\n", optarg); } break; case 'l': clevel = atoi(optarg); out_type |= FT_GZ; break; case 'v': in_type = FT_VCF; break; case 'b': out_type = FT_BCF_GZ; break; case 'u': out_type = FT_BCF; break; case 'z': out_type = FT_VCF_GZ; break; case 'n': fname_out = optarg; break; case '?': case 'h': usage(); return 1; break; } } if (argc!=optind+1) { usage(); return 1; } // Init reader strcpy(moder, "r"); if ( (!strcmp("-",argv[optind]) && (in_type & FT_BCF)) || (hts_file_type(argv[optind]) & FT_BCF)) strcat(moder, "b"); htsFile *fp_in = hts_open(argv[optind], moder, NULL); if ( !fp_in ) error("Fail to open: %s\n", argv[optind]); bcf_hdr_t *hdr = vcf_hdr_read(fp_in); if ( !hdr ) error("Fail to read VCF/BCF header: %s\n", argv[optind]); bcf1_t *rec = bcf_init1(); // Init writer strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (out_type & FT_GZ) strcat(modew,"z"); if (out_type & FT_BCF) strcat(modew, "b"); if (out_type == FT_BCF) strcat(modew, "u"); // uncompressed BCF output htsFile *fp_out = hts_open(fname_out ? fname_out : "-", modew, NULL); vcf_hdr_write(fp_out, hdr); while ( vcf_read1(fp_in, hdr, rec) >= 0) vcf_write1(fp_out, hdr, rec); bcf_destroy1(rec); bcf_hdr_destroy(hdr); hts_close(fp_in); hts_close(fp_out); return 0; }
int main_vcfview(int argc, char *argv[]) { int i, c, clevel = -1, flag = 0, n_samples = -1, *imap = 0, excl_snp = 0, excl_indel = 0; char *fn_ref = 0, *fn_out = 0, moder[8], **samples = 0; bcf_hdr_t *h, *hsub = 0; htsFile *in; bcf1_t *b; while ((c = getopt(argc, argv, "l:bSt:o:T:s:GNI")) >= 0) { switch (c) { case 'l': clevel = atoi(optarg); flag |= 2; break; case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'G': n_samples = 0; break; case 't': fn_ref = optarg; flag |= 1; break; case 'o': fn_out = optarg; break; case 's': samples = hts_readlines(optarg, &n_samples); break; case 'N': excl_snp = 1; break; case 'I': excl_indel = 1; break; } } if (argc == optind) { fprintf(stderr, "\nUsage: vcfview [options] <in.bcf>|<in.vcf>|<in.vcf.gz>\n\n"); fprintf(stderr, "Options: -b output in BCF\n"); fprintf(stderr, " -S input is VCF\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -l INT compression level [%d]\n", clevel); fprintf(stderr, " -t FILE list of reference names and lengths [null]\n"); fprintf(stderr, " -s FILE/STR list of samples (STR if started with ':'; FILE otherwise) [null]\n"); fprintf(stderr, " -G drop individual genotype information\n"); fprintf(stderr, " -N exclude SNPs\n"); fprintf(stderr, " -I exclude INDELs\n"); fprintf(stderr, "\n"); return 1; } strcpy(moder, "r"); if ((flag&1) == 0 && !(file_type(argv[optind])&(IS_VCF|IS_VCF_GZ))) strcat(moder, "b"); in = hts_open(argv[optind], moder, fn_ref); h = vcf_hdr_read(in); if (h == 0) { fprintf(stderr, "[E::%s] fail to read the VCF/BCF2 header\n", __func__); hts_close(in); return 1; } if (n_samples >= 0) { if (n_samples) imap = (int*)malloc(n_samples * sizeof(int)); hsub = bcf_hdr_subset(h, n_samples, samples, imap); } b = bcf_init1(); if ((flag&4) == 0) { // VCF/BCF output htsFile *out; char modew[8]; strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&2) strcat(modew, "b"); out = hts_open(fn_out? fn_out : "-", modew, 0); vcf_hdr_write(out, hsub? hsub : h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region hts_idx_t *idx; if ((idx = bcf_index_load(argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BCF index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while (bcf_itr_next((BGZF*)in->fp, iter, b) >= 0) { if (excl_snp && bcf_is_snp(b)) continue; if (excl_indel && !bcf_is_snp(b)) continue; if (n_samples >= 0) { bcf_subset(h, b, n_samples, imap); vcf_write1(out, hsub, b); } else vcf_write1(out, h, b); } hts_itr_destroy(iter); } hts_idx_destroy(idx); } else { while (vcf_read1(in, h, b) >= 0) { if (excl_snp && bcf_is_snp(b)) continue; if (excl_indel && !bcf_is_snp(b)) continue; if (n_samples >= 0) { bcf_subset(h, b, n_samples, imap); vcf_write1(out, hsub, b); } else vcf_write1(out, h, b); } } hts_close(out); } bcf_destroy1(b); if (n_samples > 0) { for (i = 0; i < n_samples; ++i) free(samples[i]); free(samples); bcf_hdr_destroy(hsub); free(imap); } bcf_hdr_destroy(h); hts_close(in); return 0; }
int main_view(int argc, char *argv[]) { int i, c, n_files = 0, out_bcf = 0, clevel = -1, multi_flag = 0, excl = 0, not_vcf = 0, in_mem = 0, u_set = 0; long seekn = -1, n_rec = LONG_MAX, n_read = 0; bgtm_t *bm = 0; bcf1_t *b; htsFile *out = 0; char modew[8], *reg = 0, *site_flt = 0; void *bed = 0; int n_groups = 0; char *gexpr[BGT_MAX_GROUPS], *aexpr = 0, *dbfn = 0, *fmt = 0; bgt_file_t **files = 0; fmf_t *vardb = 0; while ((c = getopt(argc, argv, "ubs:r:l:CMGB:ef:g:a:i:n:SHt:d:")) >= 0) { if (c == 'b') out_bcf = 1; else if (c == 'r') reg = optarg; else if (c == 'l') clevel = atoi(optarg); else if (c == 'e') excl = 1; else if (c == 'u') u_set = 1; else if (c == 'B') bed = bed_read(optarg); else if (c == 'C') multi_flag |= BGT_F_SET_AC; else if (c == 'G') multi_flag |= BGT_F_NO_GT; else if (c == 'S') multi_flag |= BGT_F_NO_GT | BGT_F_CNT_AL, not_vcf = 1; else if (c == 'H') multi_flag |= BGT_F_NO_GT | BGT_F_CNT_HAP, not_vcf = 1; else if (c == 'M') in_mem = 1; else if (c == 'i') seekn = atol(optarg) - 1; else if (c == 'n') n_rec = atol(optarg); else if (c == 'f') site_flt = optarg; else if (c == 't') fmt = optarg, not_vcf = 1; else if (c == 'd') dbfn = optarg; else if (c == 's' && n_groups < BGT_MAX_GROUPS) gexpr[n_groups++] = optarg; else if (c == 'a') aexpr = optarg; } if (n_rec < 0) { fprintf(stderr, "[E::%s] option -n must be at least 0.\n", __func__); return 1; } if (clevel > 9) clevel = 9; if (u_set) clevel = 0, out_bcf = 1; if (n_groups > 1) multi_flag |= BGT_F_SET_AC; if (argc - optind < 1) { fprintf(stderr, "Usage: bgt %s [options] <bgt-prefix> [...]", argv[0]); fputc('\n', stderr); fprintf(stderr, "Options:\n"); fprintf(stderr, " Sample selection:\n"); fprintf(stderr, " -s EXPR samples list (,sample1,sample2 or a file or expr; see Notes below) [all]\n"); fprintf(stderr, " Site selection:\n"); fprintf(stderr, " -r STR region [all]\n"); fprintf(stderr, " -B FILE extract variants overlapping BED FILE []\n"); fprintf(stderr, " -e exclude variants overlapping BED FILE (effective with -B)\n"); fprintf(stderr, " -i INT process from the INT-th record (1-based) []\n"); fprintf(stderr, " -n INT process at most INT records []\n"); fprintf(stderr, " -d FILE variant annotations in FMF (to work with -a) []\n"); fprintf(stderr, " -M load variant annotations in RAM (only with -d)\n"); fprintf(stderr, " -a EXPR alleles list chr:1basedPos:refLen:seq (,allele1,allele2 or a file or expr) []\n"); fprintf(stderr, " -f STR frequency filters []\n"); fprintf(stderr, " VCF output:\n"); fprintf(stderr, " -b BCF output (effective without -S/-H)\n"); fprintf(stderr, " -l INT compression level for BCF [default]\n"); fprintf(stderr, " -u equivalent to -bl0 (overriding -b and -l)\n"); fprintf(stderr, " -G don't output sample genotypes\n"); fprintf(stderr, " -C write AC/AN to the INFO field (auto applied with -f or multipl -s)\n"); fprintf(stderr, " Non-VCF output:\n"); fprintf(stderr, " -S show samples with a set of alleles (with -a)\n"); fprintf(stderr, " -H count of haplotypes with a set of alleles (with -a)\n"); fprintf(stderr, " -t STR comma-delimited list of fields to output. Accepted variables:\n"); fprintf(stderr, " AC, AN, AC#, AN#, CHROM, POS, END, REF, ALT (# for a group number)\n"); fprintf(stderr, "Notes:\n"); fprintf(stderr, " For option -s/-a, EXPR can be one of:\n"); fprintf(stderr, " 1) comma-delimited list following a colon/comma. e.g. -s,NA12878,NA12044\n"); fprintf(stderr, " 2) space-delimited file with the first column giving a sample/allele name. e.g. -s list.txt\n"); fprintf(stderr, " 3) expression if .spl/-d file contains metadata. e.g.: -s\"gender=='M'&&population!='CEU'\"\n"); fprintf(stderr, " If multiple -s is specified, the AC/AN of the first group will be written to VCF INFO AC1/AN1,\n"); fprintf(stderr, " the second to AC2/AN2, etc.\n"); return 1; } if (dbfn && in_mem) vardb = fmf_read(dbfn), dbfn = 0; if ((multi_flag&(BGT_F_CNT_AL|BGT_F_CNT_HAP)) && aexpr == 0) { fprintf(stderr, "[E::%s] -a must be specified when -S/-H is in use.\n", __func__); return 1; } n_files = argc - optind; files = (bgt_file_t**)calloc(n_files, sizeof(bgt_file_t*)); for (i = 0; i < n_files; ++i) { files[i] = bgt_open(argv[optind+i]); if (files[i] == 0) { fprintf(stderr, "[E::%s] failed to open BGT with prefix '%s'\n", __func__, argv[optind+i]); return 1; // FIXME: memory leak } } bm = bgtm_reader_init(n_files, files); bgtm_set_flag(bm, multi_flag); if (site_flt && bgtm_set_flt_site(bm, site_flt) != 0) { fprintf(stderr, "[E::%s] failed to set frequency filters. Syntax error?\n", __func__); return 1; } if (reg && bgtm_set_region(bm, reg) < 0) { fprintf(stderr, "[E::%s] failed to set region. Region format error?\n", __func__); return 1; } if (bed) bgtm_set_bed(bm, bed, excl); if (fmt && bgtm_set_table(bm, fmt) < 0) { fprintf(stderr, "[E::%s] failed to set tabular output.\n", __func__); return 1; } if (seekn > 0) bgtm_set_start(bm, seekn); if (aexpr) { int n_al; n_al = bgtm_set_alleles(bm, aexpr, vardb, dbfn); if (n_al < 0) { fprintf(stderr, "[E::%s] failed to set alleles.\n", __func__); return 1; } else if (n_al == 0) fprintf(stderr, "[W::%s] no alleles selected.\n", __func__); } for (i = 0; i < n_groups; ++i) { if (bgtm_add_group(bm, gexpr[i]) < 0) { fprintf(stderr, "[E::%s] failed to add sample group '%s'.\n", __func__, gexpr[i]); return 1; } } bgtm_prepare(bm); // bgtm_prepare() generates the VCF header if (!not_vcf) { strcpy(modew, "w"); if (out_bcf) strcat(modew, "b"); sprintf(modew + strlen(modew), "%d", clevel); out = hts_open("-", modew, 0); vcf_hdr_write(out, bm->h_out); } b = bcf_init1(); while (bgtm_read(bm, b) >= 0 && n_read < n_rec) { if (out) vcf_write1(out, bm->h_out, b); if (fmt && bm->n_fields > 0) puts(bm->tbl_line.s); ++n_read; } bcf_destroy1(b); if (not_vcf && bm->n_aal > 0) { if (bm->flag & BGT_F_CNT_HAP) { bgt_hapcnt_t *hc; int n_hap; char *s; hc = bgtm_hapcnt(bm, &n_hap); s = bgtm_hapcnt_print_destroy(bm, n_hap, hc); fputs(s, stdout); free(s); } if (bm->flag & BGT_F_CNT_AL) { char *s; if ((s = bgtm_alcnt_print(bm)) != 0) fputs(s, stdout); free(s); } } if (out) hts_close(out); bgtm_reader_destroy(bm); if (bed) bed_destroy(bed); for (i = 0; i < n_files; ++i) bgt_close(files[i]); free(files); if (vardb) fmf_destroy(vardb); return 0; }