static int query_chroms(char *fname) { const char **seq; int i, nseq, ftype = file_type(fname); if ( ftype & IS_TXT || !ftype ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi index of %s\n", fname); seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); tbx_destroy(tbx); } else if ( ftype==IS_BCF ) { htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); hts_close(fp); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); seq = bcf_index_seqnames(idx, hdr, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( ftype==IS_BAM ) // todo: BAM error("BAM: todo\n"); return 0; }
/** * Load index for the ith file, returns true if successful */ bool BCFSyncedStreamReader::load_index(int32_t i) { if (ftypes[i]==FT_BCF_GZ) { if (!(idxs[i] = bcf_index_load(vcf_files[i].c_str()))) { return false; } } else if (ftypes[i]==FT_VCF_GZ) { if (!(tbxs[i] = tbx_index_load(vcf_files[i].c_str()))) { return false; } } return true; }
/** * Load index for the ith file, returns true if successful */ bool BCFSyncedReader::load_index(int32_t i) { if (ftypes[i].format==bcf && ftypes[i].compression==bgzf) { if (!(idxs[i] = bcf_index_load(file_names[i].c_str()))) { return false; } } else if (ftypes[i].format==vcf && ftypes[i].compression==bgzf) { if (!(tbxs[i] = tbx_index_load(file_names[i].c_str()))) { return false; } } return true; }
int vcf_index_stats(char *fname, int stats) { char *fn_out = NULL; FILE *out; out = fn_out ? fopen(fn_out, "w") : stdout; const char **seq; int i, nseq; tbx_t *tbx = NULL; hts_idx_t *idx = NULL; htsFile *fp = hts_open(fname,"r"); if ( !fp ) { fprintf(stderr,"Could not read %s\n", fname); return 1; } bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", fname); return 1; } if ( hts_get_format(fp)->format==vcf ) { tbx = tbx_index_load(fname); if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", fname); return 1; } } else if ( hts_get_format(fp)->format==bcf ) { idx = bcf_index_load(fname); if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", fname); return 1; } } else { fprintf(stderr,"Could not detect the file type as VCF or BCF: %s\n", fname); return 1; } seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq); uint64_t sum = 0; for (i=0; i<nseq; i++) { uint64_t records, v; hts_idx_get_stat(tbx ? tbx->idx : idx, i, &records, &v); sum+=records; if (stats&2 || !records) continue; bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL); int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1; fprintf(out,"%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records); } if (!sum) { // No counts found. // Is this because index version has no stored count data, or no records? bcf1_t *rec = bcf_init1(); if (bcf_read1(fp, hdr, rec) >= 0) { fprintf(stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname); return 1; } bcf_destroy1(rec); } if (stats&2) fprintf(out, "%" PRIu64 "\n", sum); free(seq); fclose(out); hts_close(fp); bcf_hdr_destroy(hdr); if (tbx) tbx_destroy(tbx); if (idx) hts_idx_destroy(idx); return 0; }
int main(int argc, char* argv[]) { namespace po = boost::program_options; std::string file; std::string output; try { // Declare the supported options. po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message") ("version", "Show version") ("input-file", po::value< std::string >(), "The input files") ("output-file", po::value<std::string>(), "The output file name.") ; po::positional_options_description popts; popts.add("input-file", 1); popts.add("output-file", 1); po::options_description cmdline_options; cmdline_options .add(desc) ; po::variables_map vm; po::store(po::command_line_parser(argc, argv). options(cmdline_options).positional(popts).run(), vm); po::notify(vm); if (vm.count("version")) { std::cout << "vcfhdr2json version " << HAPLOTYPES_VERSION << "\n"; return 0; } if (vm.count("help")) { std::cout << desc << "\n"; return 1; } if (vm.count("input-file")) { file = vm["input-file"].as< std::string > (); } if (vm.count("output-file")) { output = vm["output-file"].as< std::string >(); } if(file.size() == 0) { std::cerr << "Please specify an input file.\n"; return 1; } if (output == "") { std::cerr << "Please specify an output file.\n"; return 1; } } catch (po::error & e) { std::cerr << e.what() << "\n"; return 1; } try { Json::StyledWriter writer; htsFile * fp = bcf_open(file.c_str(), "r"); bcf_hdr_t * hdr = bcf_hdr_read(fp); Json::Value root; Json::Value a; for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) { a.append(hdr->samples[i]); } root["samples"] = a; Json::Value fields; for (int i = 0; i < hdr->nhrec; i++) { Json::Value field; field["key"] = hdr->hrec[i]->key; if (!hdr->hrec[i]->value) { Json::Value values; for (int j = 0; j < hdr->hrec[i]->nkeys; j++) { values[hdr->hrec[i]->keys[j]] = hdr->hrec[i]->vals[j]; } field["values"] = values; } else { field["value"] = hdr->hrec[i]->value; } fields.append(field); } root["fields"] = fields; tbx_t * tbx_idx = tbx_index_load(file.c_str()); if ( !tbx_idx ) { hts_idx_t * csi_idx = bcf_index_load(file.c_str()); if(!csi_idx) { root["tabix"] = Json::Value::null; } else { root["tabix"] = Json::Value(); root["tabix"]["chromosomes"] = Json::Value(); int count = 0; const char ** tbx_names = bcf_index_seqnames(csi_idx, hdr, &count); for (int i = 0; i < count; ++i) { root["tabix"]["chromosomes"].append(tbx_names[i]); } free(tbx_names); hts_idx_destroy(csi_idx); } } else { root["tabix"] = Json::Value(); root["tabix"]["chromosomes"] = Json::Value(); int count = 0; const char ** tbx_names = tbx_seqnames(tbx_idx, &count); for (int i = 0; i < count; ++i) { root["tabix"]["chromosomes"].append(tbx_names[i]); } free(tbx_names); tbx_destroy(tbx_idx); } std::ofstream out(output.c_str()); out << writer.write(root); bcf_close(fp); bcf_hdr_destroy(hdr); } catch(std::runtime_error & e) { std::cerr << e.what() << std::endl; return 1; } catch(std::logic_error & e) { std::cerr << e.what() << std::endl; return 1; } return 0; }
int main_vcfview(int argc, char *argv[]) { int i, c, clevel = -1, flag = 0, n_samples = -1, *imap = 0, excl_snp = 0, excl_indel = 0; char *fn_ref = 0, *fn_out = 0, moder[8], **samples = 0; bcf_hdr_t *h, *hsub = 0; htsFile *in; bcf1_t *b; while ((c = getopt(argc, argv, "l:bSt:o:T:s:GNI")) >= 0) { switch (c) { case 'l': clevel = atoi(optarg); flag |= 2; break; case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'G': n_samples = 0; break; case 't': fn_ref = optarg; flag |= 1; break; case 'o': fn_out = optarg; break; case 's': samples = hts_readlines(optarg, &n_samples); break; case 'N': excl_snp = 1; break; case 'I': excl_indel = 1; break; } } if (argc == optind) { fprintf(stderr, "\nUsage: vcfview [options] <in.bcf>|<in.vcf>|<in.vcf.gz>\n\n"); fprintf(stderr, "Options: -b output in BCF\n"); fprintf(stderr, " -S input is VCF\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -l INT compression level [%d]\n", clevel); fprintf(stderr, " -t FILE list of reference names and lengths [null]\n"); fprintf(stderr, " -s FILE/STR list of samples (STR if started with ':'; FILE otherwise) [null]\n"); fprintf(stderr, " -G drop individual genotype information\n"); fprintf(stderr, " -N exclude SNPs\n"); fprintf(stderr, " -I exclude INDELs\n"); fprintf(stderr, "\n"); return 1; } strcpy(moder, "r"); if ((flag&1) == 0 && !(file_type(argv[optind])&(IS_VCF|IS_VCF_GZ))) strcat(moder, "b"); in = hts_open(argv[optind], moder, fn_ref); h = vcf_hdr_read(in); if (h == 0) { fprintf(stderr, "[E::%s] fail to read the VCF/BCF2 header\n", __func__); hts_close(in); return 1; } if (n_samples >= 0) { if (n_samples) imap = (int*)malloc(n_samples * sizeof(int)); hsub = bcf_hdr_subset(h, n_samples, samples, imap); } b = bcf_init1(); if ((flag&4) == 0) { // VCF/BCF output htsFile *out; char modew[8]; strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&2) strcat(modew, "b"); out = hts_open(fn_out? fn_out : "-", modew, 0); vcf_hdr_write(out, hsub? hsub : h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region hts_idx_t *idx; if ((idx = bcf_index_load(argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BCF index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while (bcf_itr_next((BGZF*)in->fp, iter, b) >= 0) { if (excl_snp && bcf_is_snp(b)) continue; if (excl_indel && !bcf_is_snp(b)) continue; if (n_samples >= 0) { bcf_subset(h, b, n_samples, imap); vcf_write1(out, hsub, b); } else vcf_write1(out, h, b); } hts_itr_destroy(iter); } hts_idx_destroy(idx); } else { while (vcf_read1(in, h, b) >= 0) { if (excl_snp && bcf_is_snp(b)) continue; if (excl_indel && !bcf_is_snp(b)) continue; if (n_samples >= 0) { bcf_subset(h, b, n_samples, imap); vcf_write1(out, hsub, b); } else vcf_write1(out, h, b); } } hts_close(out); } bcf_destroy1(b); if (n_samples > 0) { for (i = 0; i < n_samples; ++i) free(samples[i]); free(samples); bcf_hdr_destroy(hsub); free(imap); } bcf_hdr_destroy(h); hts_close(in); return 0; }
int bcf_sr_add_reader(readers_t *files, const char *fname) { files->readers = (reader_t*) realloc(files->readers, sizeof(reader_t)*(files->nreaders+1)); reader_t *reader = &files->readers[files->nreaders++]; memset(reader,0,sizeof(reader_t)); int type = file_type(fname); if ( type==IS_VCF_GZ ) { reader->tbx = tbx_index_load(fname); if ( !reader->tbx ) { fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname); return 0; } // This is just to read the header htsFile *file = hts_open(fname, "r", NULL); if ( !file ) return 0; reader->header = vcf_hdr_read(file); hts_close(file); // The VCF opened in binary tabix mode reader->file = hts_open(fname, "rb", NULL); if ( !reader->file ) return 0; } else if ( type==IS_BCF ) { reader->file = hts_open(fname, "rb", NULL); if ( !reader->file ) return 0; reader->header = vcf_hdr_read(reader->file); reader->bcf = bcf_index_load(fname); if ( !reader->bcf ) { fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname); return 0; // not indexed..? } } else { fprintf(stderr,"Expected .vcf.gz or .bcf file\n"); return 0; } reader->fname = fname; reader->filter_id = -1; if ( files->apply_filters ) reader->filter_id = bcf_id2int(reader->header, BCF_DT_ID, "PASS"); // Update list of chromosomes if ( files->region ) { if ( !files->seqs ) { files->mseqs = files->nseqs = 1; files->seqs = (const char**) malloc(sizeof(const char*)); files->seqs[0] = files->region; } } else { int n,i,j; const char **names = bcf_seqnames(reader->header, &n); for (i=0; i<n; i++) { for (j=0; j<files->nseqs; j++) if ( !strcmp(names[i],files->seqs[j]) ) break; if ( j<files->nseqs ) continue; // already have this chr files->mseqs += 30; files->seqs = (const char**) realloc(files->seqs, sizeof(const char*)*files->mseqs); files->seqs[files->nseqs++] = names[i]; } free(names); } files->iseq = -1; return 1; }
static int query_regions(args_t *args, char *fname, char **regs, int nregs) { int i; htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); enum htsExactFormat format = hts_get_format(fp)->format; regidx_t *reg_idx = NULL; if ( args->targets_fname ) { reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); if ( !reg_idx ) error("Could not read %s\n", args->targets_fname); } if ( format == bcf ) { htsFile *out = hts_open("-","w"); if ( !out ) error("Could not open stdout\n", fname); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); if ( args->print_header ) bcf_hdr_write(out,hdr); if ( !args->header_only ) { bcf1_t *rec = bcf_init(); for (i=0; i<nregs; i++) { hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]); while ( bcf_itr_next(fp, itr, rec) >=0 ) { if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; bcf_write(out,hdr,rec); } tbx_itr_destroy(itr); } bcf_destroy(rec); } if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n"); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( format==vcf || format==sam || format==unknown_format ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname); kstring_t str = {0,0,0}; if ( args->print_header ) { while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 ) { if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; puts(str.s); } } if ( !args->header_only ) { int nseq; const char **seq = NULL; if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nregs; i++) { hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]); if ( !itr ) continue; while (tbx_itr_next(fp, tbx, itr, &str) >= 0) { if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue; puts(str.s); } tbx_itr_destroy(itr); } free(seq); } free(str.s); tbx_destroy(tbx); } else if ( format==bam ) error("Please use \"samtools view\" for querying BAM files.\n"); if ( reg_idx ) regidx_destroy(reg_idx); if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname); for (i=0; i<nregs; i++) free(regs[i]); free(regs); return 0; }
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) { htsFile* file_ptr = hts_open(fname, "r"); if ( ! file_ptr ) { files->errnum = open_failed; return 0; } files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); files->has_line[files->nreaders] = 0; files->readers = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1)); bcf_sr_t *reader = &files->readers[files->nreaders++]; memset(reader,0,sizeof(bcf_sr_t)); reader->file = file_ptr; files->errnum = 0; if ( files->require_index ) { if ( reader->file->format.format==vcf ) { if ( reader->file->format.compression!=bgzf ) { files->errnum = not_bgzf; return 0; } reader->tbx_idx = tbx_index_load(fname); if ( !reader->tbx_idx ) { files->errnum = idx_load_failed; return 0; } reader->header = bcf_hdr_read(reader->file); } else if ( reader->file->format.format==bcf ) { if ( reader->file->format.compression!=bgzf ) { files->errnum = not_bgzf; return 0; } reader->header = bcf_hdr_read(reader->file); reader->bcf_idx = bcf_index_load(fname); if ( !reader->bcf_idx ) { files->errnum = idx_load_failed; return 0; } } else { files->errnum = file_type_error; return 0; } } else { if ( reader->file->format.format==bcf || reader->file->format.format==vcf ) { reader->header = bcf_hdr_read(reader->file); } else { files->errnum = file_type_error; return 0; } files->streaming = 1; } if ( files->streaming && files->nreaders>1 ) { files->errnum = api_usage_error; fprintf(stderr,"[%s:%d %s] Error: %d readers, yet require_index not set\n", __FILE__,__LINE__,__FUNCTION__,files->nreaders); return 0; } if ( files->streaming && files->regions ) { files->errnum = api_usage_error; fprintf(stderr,"[%s:%d %s] Error: cannot tabix-jump in streaming mode\n", __FILE__,__LINE__,__FUNCTION__); return 0; } if ( !reader->header ) { files->errnum = header_error; return 0; } reader->fname = fname; if ( files->apply_filters ) reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); // Update list of chromosomes if ( !files->explicit_regs && !files->streaming ) { int n,i; const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n); for (i=0; i<n; i++) { if ( !files->regions ) files->regions = _regions_init_string(names[i]); else _regions_add(files->regions, names[i], -1, -1); } free(names); } return 1; }
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) { files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); files->has_line[files->nreaders] = 0; files->readers = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1)); bcf_sr_t *reader = &files->readers[files->nreaders++]; memset(reader,0,sizeof(bcf_sr_t)); reader->file = hts_open(fname, "r"); if ( !reader->file ) return 0; reader->type = reader->file->is_bin? FT_BCF : FT_VCF; if (reader->file->is_compressed) reader->type |= FT_GZ; if ( files->require_index ) { if ( reader->type==FT_VCF_GZ ) { reader->tbx_idx = tbx_index_load(fname); if ( !reader->tbx_idx ) { fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname); return 0; } reader->header = bcf_hdr_read(reader->file); } else if ( reader->type==FT_BCF_GZ ) { reader->header = bcf_hdr_read(reader->file); reader->bcf_idx = bcf_index_load(fname); if ( !reader->bcf_idx ) { fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname); return 0; // not indexed..? } } else { fprintf(stderr,"Index required, expected .vcf.gz or .bcf file: %s\n", fname); return 0; } } else { if ( reader->type & FT_BCF ) { reader->header = bcf_hdr_read(reader->file); } else if ( reader->type & FT_VCF ) { reader->header = bcf_hdr_read(reader->file); } else { fprintf(stderr,"File type not recognised: %s\n", fname); return 0; } files->streaming = 1; } if ( files->streaming && files->nreaders>1 ) { fprintf(stderr,"[%s:%d %s] Error: %d readers, yet require_index not set\n", __FILE__,__LINE__,__FUNCTION__,files->nreaders); return 0; } if ( files->streaming && files->regions ) { fprintf(stderr,"[%s:%d %s] Error: cannot tabix-jump in streaming mode\n", __FILE__,__LINE__,__FUNCTION__); return 0; } if ( !reader->header ) return 0; reader->fname = fname; if ( files->apply_filters ) reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); // Update list of chromosomes if ( !files->explicit_regs && !files->streaming ) { int n,i; const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n); for (i=0; i<n; i++) { if ( !files->regions ) files->regions = _regions_init_string(names[i]); else _regions_add(files->regions, names[i], -1, -1); } free(names); } return 1; }
BCFOrderedReader::BCFOrderedReader(std::string file_name, std::vector<GenomeInterval>& intervals) { this->file_name = (file_name=="+")? "-" : file_name; file = NULL; hdr = NULL; idx = NULL; tbx = NULL; itr = NULL; this->intervals = intervals; interval_index = 0; index_loaded = false; file = hts_open(this->file_name.c_str(), "r"); if (!file) { fprintf(stderr, "[%s:%d %s] Cannot open %s\n", __FILE__, __LINE__, __FUNCTION__, file_name.c_str()); exit(1); } ftype = file->format; if (ftype.format!=vcf && ftype.format!=bcf) { fprintf(stderr, "[%s:%d %s] Not a VCF/BCF file: %s\n", __FILE__, __LINE__, __FUNCTION__, file_name.c_str()); exit(1); } s = {0, 0, 0}; if (file==NULL) exit(1); hdr = bcf_alt_hdr_read(file); if (!hdr) exit(1); intervals_present = intervals.size()!=0; if (ftype.format==bcf) { if ((idx = bcf_index_load(file_name.c_str()))) { index_loaded = true; } else { if (intervals_present) { fprintf(stderr, "[E:%s] index cannot be loaded for %s for random access, ignoring specified intervals and reading from start.\n", __FUNCTION__, file_name.c_str()); // exit(1); } } } else if (ftype.format==vcf) { if (ftype.compression==bgzf) { if ((tbx = tbx_index_load(file_name.c_str()))) { index_loaded = true; } else { if (intervals_present) { fprintf(stderr, "[E:%s] index cannot be loaded for %s for random access, ignoring specified intervals and reading from start.\n", __FUNCTION__, file_name.c_str()); // exit(1); } } } else { if (intervals_present) { fprintf(stderr, "[E:%s] no random access support for VCF file: %s\n", __FUNCTION__, file_name.c_str()); // exit(1); } } } random_access_enabled = intervals_present && index_loaded; };
int convert(int argc, char **argv) { if (argc < 2) return convert_help(); int c; char *in=NULL, *out=NULL, *bim=NULL, *vid=NULL, *tmp_dir=NULL, *ped=NULL; uint32_t num_fields, num_records, col = 2; int i_is_set = 0, o_is_set = 0, f_is_set = 0, b_is_set = 0, v_is_set = 0, t_is_set = 0, p_is_set = 0, r_is_set = 0; while((c = getopt (argc, argv, "hi:o:f:r:b:v:t:p:c:")) != -1) { switch (c) { case 'c': col = atoi(optarg); break; case 'p': p_is_set = 1; ped = optarg; break; case 't': t_is_set = 1; tmp_dir = optarg; break; case 'v': v_is_set = 1; vid = optarg; break; case 'b': b_is_set = 1; bim = optarg; break; case 'i': i_is_set = 1; in = optarg; break; case 'o': o_is_set = 1; out = optarg; break; case 'f': f_is_set = 1; num_fields = atoi(optarg); break; case 'r': r_is_set = 1; num_records = atoi(optarg); break; case 'h': convert_help(); return 1; case '?': if ( (optopt == 'i') || (optopt == 'f') || (optopt == 'r') || (optopt == 't') || (optopt == 's') || (optopt == 'p') || (optopt == 'c') || (optopt == 'o') ) fprintf (stderr, "Option -%c requires an argument.\n", optopt); else if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); default: convert_help(); return 1; } } char *type = argv[0]; if (i_is_set == 0) { printf("Input file is not set\n"); return convert_help(); } if (strcmp(type, "bcf") == 0) { if ( (f_is_set == 0) || (r_is_set == 0) ) { fprintf(stderr,"Attempting to autodetect num of records " "and fields from %s\n", in); //Try and auto detect the sizes, need the index tbx_t *tbx = NULL; hts_idx_t *idx = NULL; htsFile *fp = hts_open(in,"rb"); if ( !fp ) { fprintf(stderr,"Could not read %s\n", in); return 1; } bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", in); return 1; } if (hts_get_format(fp)->format==vcf) { tbx = tbx_index_load(in); if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", in); return 1; } } else if ( hts_get_format(fp)->format==bcf ) { idx = bcf_index_load(in); if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", in); return 1; } } else { fprintf(stderr, "Could not detect the file type as VCF or BCF: %s\n", in); return 1; } num_fields = hdr->n[BCF_DT_SAMPLE]; num_records = 0; const char **seq; int nseq; seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq); int i; uint32_t sum = 0; for (i = 0; i < nseq; ++i) { uint64_t records, v; hts_idx_get_stat(tbx ? tbx->idx: idx, i, &records, &v); num_records += records; } fprintf(stderr, "Number of records:%u\tNumber of fields:%u\n", num_records, num_fields); free(seq); hts_close(fp); bcf_hdr_destroy(hdr); if (idx) hts_idx_destroy(idx); if (tbx) tbx_destroy(tbx); } if (o_is_set == 0) { out = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(out,in); strcat(out, ".gqt"); } if (b_is_set == 0) { bim = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(bim,in); strcat(bim, ".bim"); } if (v_is_set == 0) { vid = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(vid,in); strcat(vid, ".vid"); } if (t_is_set == 0) { tmp_dir = (char*)malloc(3*sizeof(char)); // "./\0" strcpy(tmp_dir,"./"); } int r = bcf_wahbm(in, out, bim, vid, tmp_dir, num_fields, num_records); return r; } if (strcmp(type, "ped") == 0) { if (o_is_set == 0) { if (p_is_set == 1) { out = (char*)malloc(strlen(ped) + 4); // 4 for ext and \0 strcpy(out,ped); strcat(out, ".db"); } else { out = (char*)malloc(strlen(in) + 4); // 4 for ext and \0 strcpy(out,in); strcat(out, ".db"); } } fprintf(stderr, "Creating sample database %s\n", out); return ped_ped(in, ped, col, out); } return convert_help(); }