static int query_chroms(char *fname) { const char **seq; int i, nseq, ftype = file_type(fname); if ( ftype & IS_TXT || !ftype ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi index of %s\n", fname); seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); tbx_destroy(tbx); } else if ( ftype==IS_BCF ) { htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); hts_close(fp); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); seq = bcf_index_seqnames(idx, hdr, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( ftype==IS_BAM ) // todo: BAM error("BAM: todo\n"); return 0; }
int vcf_index_stats(char *fname, int stats) { char *fn_out = NULL; FILE *out; out = fn_out ? fopen(fn_out, "w") : stdout; const char **seq; int i, nseq; tbx_t *tbx = NULL; hts_idx_t *idx = NULL; htsFile *fp = hts_open(fname,"r"); if ( !fp ) { fprintf(stderr,"Could not read %s\n", fname); return 1; } bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", fname); return 1; } if ( hts_get_format(fp)->format==vcf ) { tbx = tbx_index_load(fname); if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", fname); return 1; } } else if ( hts_get_format(fp)->format==bcf ) { idx = bcf_index_load(fname); if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", fname); return 1; } } else { fprintf(stderr,"Could not detect the file type as VCF or BCF: %s\n", fname); return 1; } seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq); uint64_t sum = 0; for (i=0; i<nseq; i++) { uint64_t records, v; hts_idx_get_stat(tbx ? tbx->idx : idx, i, &records, &v); sum+=records; if (stats&2 || !records) continue; bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL); int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1; fprintf(out,"%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records); } if (!sum) { // No counts found. // Is this because index version has no stored count data, or no records? bcf1_t *rec = bcf_init1(); if (bcf_read1(fp, hdr, rec) >= 0) { fprintf(stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname); return 1; } bcf_destroy1(rec); } if (stats&2) fprintf(out, "%" PRIu64 "\n", sum); free(seq); fclose(out); hts_close(fp); bcf_hdr_destroy(hdr); if (tbx) tbx_destroy(tbx); if (idx) hts_idx_destroy(idx); return 0; }
int main(int argc, char* argv[]) { namespace po = boost::program_options; std::string file; std::string output; try { // Declare the supported options. po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message") ("version", "Show version") ("input-file", po::value< std::string >(), "The input files") ("output-file", po::value<std::string>(), "The output file name.") ; po::positional_options_description popts; popts.add("input-file", 1); popts.add("output-file", 1); po::options_description cmdline_options; cmdline_options .add(desc) ; po::variables_map vm; po::store(po::command_line_parser(argc, argv). options(cmdline_options).positional(popts).run(), vm); po::notify(vm); if (vm.count("version")) { std::cout << "vcfhdr2json version " << HAPLOTYPES_VERSION << "\n"; return 0; } if (vm.count("help")) { std::cout << desc << "\n"; return 1; } if (vm.count("input-file")) { file = vm["input-file"].as< std::string > (); } if (vm.count("output-file")) { output = vm["output-file"].as< std::string >(); } if(file.size() == 0) { std::cerr << "Please specify an input file.\n"; return 1; } if (output == "") { std::cerr << "Please specify an output file.\n"; return 1; } } catch (po::error & e) { std::cerr << e.what() << "\n"; return 1; } try { Json::StyledWriter writer; htsFile * fp = bcf_open(file.c_str(), "r"); bcf_hdr_t * hdr = bcf_hdr_read(fp); Json::Value root; Json::Value a; for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) { a.append(hdr->samples[i]); } root["samples"] = a; Json::Value fields; for (int i = 0; i < hdr->nhrec; i++) { Json::Value field; field["key"] = hdr->hrec[i]->key; if (!hdr->hrec[i]->value) { Json::Value values; for (int j = 0; j < hdr->hrec[i]->nkeys; j++) { values[hdr->hrec[i]->keys[j]] = hdr->hrec[i]->vals[j]; } field["values"] = values; } else { field["value"] = hdr->hrec[i]->value; } fields.append(field); } root["fields"] = fields; tbx_t * tbx_idx = tbx_index_load(file.c_str()); if ( !tbx_idx ) { hts_idx_t * csi_idx = bcf_index_load(file.c_str()); if(!csi_idx) { root["tabix"] = Json::Value::null; } else { root["tabix"] = Json::Value(); root["tabix"]["chromosomes"] = Json::Value(); int count = 0; const char ** tbx_names = bcf_index_seqnames(csi_idx, hdr, &count); for (int i = 0; i < count; ++i) { root["tabix"]["chromosomes"].append(tbx_names[i]); } free(tbx_names); hts_idx_destroy(csi_idx); } } else { root["tabix"] = Json::Value(); root["tabix"]["chromosomes"] = Json::Value(); int count = 0; const char ** tbx_names = tbx_seqnames(tbx_idx, &count); for (int i = 0; i < count; ++i) { root["tabix"]["chromosomes"].append(tbx_names[i]); } free(tbx_names); tbx_destroy(tbx_idx); } std::ofstream out(output.c_str()); out << writer.write(root); bcf_close(fp); bcf_hdr_destroy(hdr); } catch(std::runtime_error & e) { std::cerr << e.what() << std::endl; return 1; } catch(std::logic_error & e) { std::cerr << e.what() << std::endl; return 1; } return 0; }
int convert(int argc, char **argv) { if (argc < 2) return convert_help(); int c; char *in=NULL, *out=NULL, *bim=NULL, *vid=NULL, *tmp_dir=NULL, *ped=NULL; uint32_t num_fields, num_records, col = 2; int i_is_set = 0, o_is_set = 0, f_is_set = 0, b_is_set = 0, v_is_set = 0, t_is_set = 0, p_is_set = 0, r_is_set = 0; while((c = getopt (argc, argv, "hi:o:f:r:b:v:t:p:c:")) != -1) { switch (c) { case 'c': col = atoi(optarg); break; case 'p': p_is_set = 1; ped = optarg; break; case 't': t_is_set = 1; tmp_dir = optarg; break; case 'v': v_is_set = 1; vid = optarg; break; case 'b': b_is_set = 1; bim = optarg; break; case 'i': i_is_set = 1; in = optarg; break; case 'o': o_is_set = 1; out = optarg; break; case 'f': f_is_set = 1; num_fields = atoi(optarg); break; case 'r': r_is_set = 1; num_records = atoi(optarg); break; case 'h': convert_help(); return 1; case '?': if ( (optopt == 'i') || (optopt == 'f') || (optopt == 'r') || (optopt == 't') || (optopt == 's') || (optopt == 'p') || (optopt == 'c') || (optopt == 'o') ) fprintf (stderr, "Option -%c requires an argument.\n", optopt); else if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); default: convert_help(); return 1; } } char *type = argv[0]; if (i_is_set == 0) { printf("Input file is not set\n"); return convert_help(); } if (strcmp(type, "bcf") == 0) { if ( (f_is_set == 0) || (r_is_set == 0) ) { fprintf(stderr,"Attempting to autodetect num of records " "and fields from %s\n", in); //Try and auto detect the sizes, need the index tbx_t *tbx = NULL; hts_idx_t *idx = NULL; htsFile *fp = hts_open(in,"rb"); if ( !fp ) { fprintf(stderr,"Could not read %s\n", in); return 1; } bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", in); return 1; } if (hts_get_format(fp)->format==vcf) { tbx = tbx_index_load(in); if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", in); return 1; } } else if ( hts_get_format(fp)->format==bcf ) { idx = bcf_index_load(in); if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", in); return 1; } } else { fprintf(stderr, "Could not detect the file type as VCF or BCF: %s\n", in); return 1; } num_fields = hdr->n[BCF_DT_SAMPLE]; num_records = 0; const char **seq; int nseq; seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq); int i; uint32_t sum = 0; for (i = 0; i < nseq; ++i) { uint64_t records, v; hts_idx_get_stat(tbx ? tbx->idx: idx, i, &records, &v); num_records += records; } fprintf(stderr, "Number of records:%u\tNumber of fields:%u\n", num_records, num_fields); free(seq); hts_close(fp); bcf_hdr_destroy(hdr); if (idx) hts_idx_destroy(idx); if (tbx) tbx_destroy(tbx); } if (o_is_set == 0) { out = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(out,in); strcat(out, ".gqt"); } if (b_is_set == 0) { bim = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(bim,in); strcat(bim, ".bim"); } if (v_is_set == 0) { vid = (char*)malloc(strlen(in) + 5); // 5 for ext and \0 strcpy(vid,in); strcat(vid, ".vid"); } if (t_is_set == 0) { tmp_dir = (char*)malloc(3*sizeof(char)); // "./\0" strcpy(tmp_dir,"./"); } int r = bcf_wahbm(in, out, bim, vid, tmp_dir, num_fields, num_records); return r; } if (strcmp(type, "ped") == 0) { if (o_is_set == 0) { if (p_is_set == 1) { out = (char*)malloc(strlen(ped) + 4); // 4 for ext and \0 strcpy(out,ped); strcat(out, ".db"); } else { out = (char*)malloc(strlen(in) + 4); // 4 for ext and \0 strcpy(out,in); strcat(out, ".db"); } } fprintf(stderr, "Creating sample database %s\n", out); return ped_ped(in, ped, col, out); } return convert_help(); }