void bcf_sr_destroy(readers_t *files) { if ( !files->nreaders ) return; int i; for (i=0; i<files->nreaders; i++) { reader_t *reader = &files->readers[i]; if ( reader->tbx ) tbx_destroy(reader->tbx); if ( reader->bcf ) hts_idx_destroy(reader->bcf); bcf_hdr_destroy(reader->header); hts_close(reader->file); if ( reader->itr ) tbx_itr_destroy(reader->itr); int j; for (j=0; j<reader->mbuffer; j++) bcf_destroy1(reader->buffer[j]); free(reader->buffer); if ( reader->samples ) free(reader->samples); } free(files->readers); free(files->seqs); for (i=0; i<files->n_smpl; i++) free(files->samples[i]); free(files->samples); if (files->targets) { if (files->targets->itr) tbx_itr_destroy(files->targets->itr); tbx_destroy(files->targets->tbx); if (files->targets->line.m) free(files->targets->line.s); hts_close(files->targets->file); free(files->targets->seq_names); free(files->targets); } if ( files->tmps.m ) free(files->tmps.s); free(files); }
int main_vcfview(int argc, char *argv[]) { int c, clevel = -1, in_type = FT_BCF, out_type = FT_VCF; char *fname_out = NULL, moder[8], modew[8]; while ((c = getopt(argc, argv, "l:bvo:n:z?hu")) >= 0) { switch (c) { case 'o': switch (optarg[0]) { case 'b': out_type = FT_BCF_GZ; break; case 'u': out_type = FT_BCF; break; case 'z': out_type = FT_VCF_GZ; break; case 'v': out_type = FT_VCF; break; default: error("The output type \"%s\" not recognised\n", optarg); } break; case 'l': clevel = atoi(optarg); out_type |= FT_GZ; break; case 'v': in_type = FT_VCF; break; case 'b': out_type = FT_BCF_GZ; break; case 'u': out_type = FT_BCF; break; case 'z': out_type = FT_VCF_GZ; break; case 'n': fname_out = optarg; break; case '?': case 'h': usage(); return 1; break; } } if (argc!=optind+1) { usage(); return 1; } // Init reader strcpy(moder, "r"); if ( (!strcmp("-",argv[optind]) && (in_type & FT_BCF)) || (hts_file_type(argv[optind]) & FT_BCF)) strcat(moder, "b"); htsFile *fp_in = hts_open(argv[optind], moder, NULL); if ( !fp_in ) error("Fail to open: %s\n", argv[optind]); bcf_hdr_t *hdr = vcf_hdr_read(fp_in); if ( !hdr ) error("Fail to read VCF/BCF header: %s\n", argv[optind]); bcf1_t *rec = bcf_init1(); // Init writer strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (out_type & FT_GZ) strcat(modew,"z"); if (out_type & FT_BCF) strcat(modew, "b"); if (out_type == FT_BCF) strcat(modew, "u"); // uncompressed BCF output htsFile *fp_out = hts_open(fname_out ? fname_out : "-", modew, NULL); vcf_hdr_write(fp_out, hdr); while ( vcf_read1(fp_in, hdr, rec) >= 0) vcf_write1(fp_out, hdr, rec); bcf_destroy1(rec); bcf_hdr_destroy(hdr); hts_close(fp_in); hts_close(fp_out); return 0; }
/** * Parse multiple files from command line unlabeled arguments or -L denoted file list. If both are defined, the files are merged. * * @files - file names are stored in this vector * @argument_files - vector of input files * @file_list - file names stored in a file * */ void Program::parse_files(std::vector<std::string>& files, const std::vector<std::string>& arg_files, std::string file_list) { files.clear(); if (arg_files.size()!=0) { files = arg_files; } if (file_list != "") { htsFile *file = hts_open(file_list.c_str(), "r"); if (file==NULL) { std::cerr << "cannot open " << file_list << "\n"; exit(1); } kstring_t *s = &file->line; while (hts_getline(file, '\n', s) >= 0) { if (s->s[0]!='#') { files.push_back(std::string(s->s)); } } hts_close(file); } }
/** * Reads header of a VCF file and returns the bcf header object. * This wraps around vcf_hdr_read from the original htslib to * allow for an alternative header file to be read in. * * this searches for the alternative header saved as <filename>.hdr */ bcf_hdr_t *bcf_alt_hdr_read(htsFile *fp) { bcf_hdr_t *h = NULL; //check for existence of alternative header kstring_t alt_hdr_fn = {0, 0, 0}; kputs(fp->fn, &alt_hdr_fn); kputs(".hdr", &alt_hdr_fn); FILE *file = fopen(alt_hdr_fn.s, "r"); if (!file) { h = bcf_hdr_read(fp); } else { fprintf(stderr, "[I:%s:%d %s] read alternative header for %s\n", __FILE__, __LINE__, __FUNCTION__, fp->fn); fclose(file); htsFile *alt_hdr = hts_open(alt_hdr_fn.s, "r"); h = bcf_hdr_read(alt_hdr); hts_close(alt_hdr); //helps move the pointer to the right place bcf_hdr_t *temp_h = bcf_hdr_read(fp); bcf_hdr_destroy(temp_h); } if (alt_hdr_fn.m) free(alt_hdr_fn.s); return h; }
static void destroy_data(args_t *args) { if ( args->flag & CF_CCALL ) ccall_destroy(&args->aux); else if ( args->flag & CF_MCALL ) mcall_destroy(&args->aux); else if ( args->flag & CF_QCALL ) qcall_destroy(&args->aux); int i; for (i=0; i<args->nsamples; i++) free(args->samples[i]); if ( args->aux.fams ) { for (i=0; i<args->aux.nfams; i++) free(args->aux.fams[i].name); free(args->aux.fams); } if ( args->missed_line ) bcf_destroy(args->missed_line); ploidy_destroy(args->ploidy); if ( args->gvcf.line ) bcf_destroy(args->gvcf.line); free(args->gvcf.gt); free(args->gvcf.dp); free(args->sex2ploidy); free(args->sex2ploidy_prev); free(args->samples); free(args->samples_map); free(args->sample2sex); free(args->aux.ploidy); bcf_hdr_destroy(args->aux.hdr); hts_close(args->out_fh); bcf_sr_destroy(args->aux.srs); }
int bam_access_openhts(char *hts_file, char *ref_file){ assert(hts_file != NULL); //Assign memory for the file name etc holding struct fholder = malloc(sizeof(file_holder)); check_mem(fholder); //Beginning and end of tmp struct for bam access fholder->beg = 0; fholder->end = 0x7fffffff; // The max 32 bit integer. //Open a file for read from compressed bam. fholder->in = hts_open(hts_file, "r"); check(fholder->in != 0,"HTS file %s failed to open.",hts_file); fholder->idx = sam_index_load(fholder->in,hts_file); check(fholder->idx != 0,"HTS index file %s failed to open.",hts_file); if(ref_file){ hts_set_fai_filename(fholder->in, ref_file); }else{ if(fholder->in->format.format == cram) log_warn("No reference file provided for a cram input file, if the reference described in the cram header can't be located this script may fail."); } //Check for generic header read method. fholder->head = sam_hdr_read(fholder->in); return 0; error: if(fholder->in) hts_close(fholder->in); if(fholder) free(fholder); return -1; }
static int query_chroms(char *fname) { const char **seq; int i, nseq, ftype = file_type(fname); if ( ftype & IS_TXT || !ftype ) { tbx_t *tbx = tbx_index_load(fname); if ( !tbx ) error("Could not load .tbi index of %s\n", fname); seq = tbx_seqnames(tbx, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); tbx_destroy(tbx); } else if ( ftype==IS_BCF ) { htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Could not read %s\n", fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Could not read the header: %s\n", fname); hts_close(fp); hts_idx_t *idx = bcf_index_load(fname); if ( !idx ) error("Could not load .csi index of %s\n", fname); seq = bcf_index_seqnames(idx, hdr, &nseq); for (i=0; i<nseq; i++) printf("%s\n", seq[i]); free(seq); bcf_hdr_destroy(hdr); hts_idx_destroy(idx); } else if ( ftype==IS_BAM ) // todo: BAM error("BAM: todo\n"); return 0; }
static void destroy_data(args_t *args) { if ( args->prefix ) { fclose(args->fh_log); int i, n = args->isec_op==OP_VENN ? 3 : args->files->nreaders; for (i=0; i<n; i++) { if ( !args->fnames[i] ) continue; hts_close(args->fh_out[i]); if ( args->output_type==FT_VCF_GZ ) { tbx_conf_t conf = tbx_conf_vcf; tbx_index_build(args->fnames[i], -1, &conf); } else if ( args->output_type==FT_BCF_GZ ) { if ( bcf_index_build(args->fnames[i],14) ) error("Could not index %s\n", args->fnames[i]); } free(args->fnames[i]); } free(args->fh_out); free(args->fnames); if ( args->fh_sites ) fclose(args->fh_sites); if ( args->write ) free(args->write); } }
hts_streamer:: ~hts_streamer() { if (_titr) tbx_itr_destroy(_titr); if (_tidx) tbx_destroy(_tidx); if (_hfp) hts_close(_hfp); if (_kstr.s) free(_kstr.s); }
static int view_sam(hFILE *hfp, const char *filename) { samFile *in = hts_hopen(hfp, filename, "r"); if (in == NULL) return 0; samFile *out = dup_stdout("w"); bam_hdr_t *hdr = sam_hdr_read(in); if (show_headers) sam_hdr_write(out, hdr); if (mode == view_all) { bam1_t *b = bam_init1(); while (sam_read1(in, hdr, b) >= 0) sam_write1(out, hdr, b); bam_destroy1(b); } bam_hdr_destroy(hdr); hts_close(out); hts_close(in); return 1; }
void bcf_sweep_destroy(bcf_sweep_t *sw) { int i; for (i=0; i<sw->mrec; i++) bcf_empty1(&sw->rec[i]); free(sw->idx); free(sw->rec); free(sw->lals); bcf_hdr_destroy(sw->hdr); hts_close(sw->file); free(sw); }
static int view_vcf(hFILE *hfp, const char *filename) { vcfFile *in = hts_hopen(hfp, filename, "r"); if (in == NULL) return 0; vcfFile *out = dup_stdout("w"); bcf_hdr_t *hdr = bcf_hdr_read(in); if (show_headers) bcf_hdr_write(out, hdr); if (mode == view_all) { bcf1_t *rec = bcf_init(); while (bcf_read(in, hdr, rec) >= 0) bcf_write(out, hdr, rec); bcf_destroy(rec); } bcf_hdr_destroy(hdr); hts_close(out); hts_close(in); return 1; }
void dalloc_bufReader(bufReader &ret){ if(ret.hdr) bam_hdr_destroy(ret.hdr); if(ret.itr) hts_itr_destroy(ret.itr); // fprintf(stderr,"idx:%p\n",ret.idx); // exit(0); if(ret.idx) hts_idx_destroy(ret.idx); free(ret.fn); hts_close(ret.fp); }
bam_hdr_t* hts_file_header(string& filename, string& header) { samFile *in = hts_open(filename.c_str(), "r"); if (in == NULL) { cerr << "[vg::alignment] could not open " << filename << endl; exit(1); } bam_hdr_t *hdr = sam_hdr_read(in); header = hdr->text; bam_hdr_destroy(hdr); hts_close(in); return hdr; }
static int load_genmap(args_t *args, bcf1_t *line) { if ( !args->genmap_fname ) { args->ngenmap = 0; return 0; } kstring_t str = {0,0,0}; char *fname = strstr(args->genmap_fname,"{CHROM}"); if ( fname ) { kputsn(args->genmap_fname, fname - args->genmap_fname, &str); kputs(bcf_seqname(args->hdr,line), &str); kputs(fname+7,&str); fname = str.s; } else fname = args->genmap_fname; htsFile *fp = hts_open(fname, "rb"); if ( !fp ) { args->ngenmap = 0; return -1; } hts_getline(fp, KS_SEP_LINE, &str); if ( strcmp(str.s,"position COMBINED_rate(cM/Mb) Genetic_Map(cM)") ) error("Unexpected header, found:\n\t[%s], but expected:\n\t[position COMBINED_rate(cM/Mb) Genetic_Map(cM)]\n", fname, str.s); args->ngenmap = args->igenmap = 0; while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 ) { args->ngenmap++; hts_expand(genmap_t,args->ngenmap,args->mgenmap,args->genmap); genmap_t *gm = &args->genmap[args->ngenmap-1]; char *tmp, *end; gm->pos = strtol(str.s, &tmp, 10); if ( str.s==tmp ) error("Could not parse %s: %s\n", fname, str.s); // skip second column tmp++; while ( *tmp && !isspace(*tmp) ) tmp++; // read the genetic map in cM gm->rate = strtod(tmp+1, &end); if ( tmp+1==end ) error("Could not parse %s: %s\n", fname, str.s); } if ( !args->ngenmap ) error("Genetic map empty?\n"); int i; for (i=0; i<args->ngenmap; i++) args->genmap[i].rate /= args->genmap[args->ngenmap-1].rate; // scale to 1 if ( hts_close(fp) ) error("Close failed\n"); free(str.s); return 0; }
/* * Reads a file and outputs a new CRAM file to stdout with 'h' * replaced as the header. No checks are made to the validity. * * FIXME: error checking */ int cram_reheader(cram_fd *in, bam_hdr_t *h, const char *arg_list, int add_PG) { htsFile *h_out = hts_open("-", "wc"); cram_fd *out = h_out->fp.cram; cram_container *c = NULL; int ret = -1; // Attempt to fill out a cram->refs[] array from @SQ headers cram_fd_set_header(out, sam_hdr_parse_(h->text, h->l_text)); if (add_PG) { if (sam_hdr_add_PG(cram_fd_get_header(out), "samtools", "VN", samtools_version(), arg_list ? "CL": NULL, arg_list ? arg_list : NULL, NULL) != 0) goto err; // Covert back to bam_hdr_t struct free(h->text); h->text = strdup(sam_hdr_str(cram_fd_get_header(out))); h->l_text = sam_hdr_length(cram_fd_get_header(out)); if (!h->text) goto err; } if (sam_hdr_write(h_out, h) != 0) goto err; cram_set_option(out, CRAM_OPT_REFERENCE, NULL); while ((c = cram_read_container(in))) { int32_t i, num_blocks = cram_container_get_num_blocks(c); if (cram_write_container(out, c) != 0) goto err; for (i = 0; i < num_blocks; i++) { cram_block *blk = cram_read_block(in); if (!blk || cram_write_block(out, blk) != 0) { if (blk) cram_free_block(blk); goto err; } cram_free_block(blk); } cram_free_container(c); } ret = 0; err: if (hts_close(h_out) != 0) ret = -1; return ret; }
static void bcf_sr_destroy1(bcf_sr_t *reader) { if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx); if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); bcf_hdr_destroy(reader->header); hts_close(reader->file); if ( reader->itr ) tbx_itr_destroy(reader->itr); int j; for (j=0; j<reader->mbuffer; j++) bcf_destroy1(reader->buffer[j]); free(reader->buffer); free(reader->samples); free(reader->filter_ids); }
static void destroy_data(args_t *args) { free(args->plugin.name); args->plugin.destroy(); dlclose(args->plugin.handle); if ( args->hdr_out ) bcf_hdr_destroy(args->hdr_out); if ( args->nplugin_paths>0 ) { free(args->plugin_paths[0]); free(args->plugin_paths); } if ( args->filter ) filter_destroy(args->filter); if (args->out_fh) hts_close(args->out_fh); }
void union_data::scanGenotypes(string filename) { vrb.title("Scanning genotype data in [" + filename + "]"); htsFile * fp = hts_open(filename.c_str(),"r"); enum htsExactFormat fileformat = fp->format.format; hts_close(fp); if (fileformat == bcf) { vrb.bullet("File format detected: BCF"); scanGenotypesVCF(filename); } else if (fileformat == vcf) { vrb.bullet("File format detected: VCF"); scanGenotypesVCF(filename); } else if (fileformat == sam) { vrb.bullet("File format detected: BED"); scanGenotypesBED(filename); } else vrb.error("File format not supported!"); }
int beds_file_destroy(struct beds_anno_file *file) { int i; hts_close(file->fp); tbx_destroy(file->idx); for ( i = 0; i < file->n_cols; ++i ) free(file->cols[i].hdr_key); free(file->cols); for ( i = 0; i < file->max; ++i ) beds_anno_tsv_destroy(file->buffer[i]); if ( file->fname ) free(file->fname); if ( file->max ) free(file->buffer); return 0; }
void cis_data::scanPhenotypes(string fbed) { int n_includedP = 0; int n_excludedP = 0; int n_negativeStrd = 0; //Open BED file vrb.title("Scanning phenotype data in [" + fbed + "]"); htsFile *fp = hts_open(fbed.c_str(),"r"); if (!fp) vrb.error("Cannot open file"); tbx_t * tbx = tbx_index_load(fbed.c_str()); if (!tbx) vrb.error("Cannot open index file"); //Read header kstring_t str = {0,0,0}; if (!hts_getline(fp, KS_SEP_LINE, &str) || !str.l || str.s[0] != tbx->conf.meta_char ) vrb.error("Cannot read header line"); //Scan file vector < string > tokens; while (hts_getline(fp, KS_SEP_LINE, &str) >= 0) { if (str.l && str.s[0] != tbx->conf.meta_char) { stb.split(string(str.s), tokens); if (tokens.size() < 5) vrb.error("Incorrect number of columns!"); if ((grp_mode == GRP_NONE && filter_phenotype.check(tokens[3])) || (grp_mode != GRP_NONE && filter_phenotype.check(tokens[4]))) { phenotype_id.push_back(tokens[3]); phenotype_chr.push_back(tokens[0]); phenotype_start.push_back(atoi(tokens[1].c_str()) + 1); phenotype_end.push_back(atoi(tokens[2].c_str())); if (grp_mode > 0 && full_test) phenotype_grp.push_back("ALL_GENES"); if (grp_mode > 0 && !full_test) phenotype_grp.push_back(tokens[4]); phenotype_neg.push_back(tokens[5] == "-"); if (phenotype_neg.back()) n_negativeStrd ++; n_includedP++; } else n_excludedP ++; } } //Finalize & verbose tbx_destroy(tbx); if (hts_close(fp)) vrb.error("Cannot properly close file"); phenotype_count = phenotype_id.size(); vrb.bullet(stb.str(n_includedP) + " phenotypes included"); if (n_excludedP > 0) vrb.bullet(stb.str(n_excludedP) + " phenotypes excluded by user"); if (n_negativeStrd > 0 ) vrb.bullet(stb.str(n_negativeStrd) + " phenotypes are on the negative strand"); if (phenotype_count == 0) vrb.leave("Cannot find phenotypes in region!"); }
static void destroy_data(args_t *args) { free(args->info_tags); free(args->fmt_tags); if ( args->filter ) filter_destroy(args->filter); int i, nsmpl = bcf_hdr_nsamples(args->hdr_in); for (i=0; i<nsmpl; i++) { if ( args->fh[i] && hts_close(args->fh[i])!=0 ) error("Error: close failed!\n"); free(args->bnames[i]); } free(args->bnames); free(args->fh); bcf_sr_destroy(args->sr); bcf_hdr_destroy(args->hdr_out); free(args); }
int hts_for_each(string& filename, function<void(Alignment&)> lambda) { samFile *in = hts_open(filename.c_str(), "r"); if (in == NULL) return 0; bam_hdr_t *hdr = sam_hdr_read(in); map<string, string> rg_sample; parse_rg_sample_map(hdr->text, rg_sample); bam1_t *b = bam_init1(); while (sam_read1(in, hdr, b) >= 0) { Alignment a = bam_to_alignment(b, rg_sample); lambda(a); } bam_destroy1(b); bam_hdr_destroy(hdr); hts_close(in); return 1; }
static void read_header_file(char *fname, kstring_t *hdr) { kstring_t tmp = {0,0,0}; hdr->l = 0; htsFile *fp = hts_open(fname, "r"); if ( !fp ) error("Could not read: %s\n", fname); while ( hts_getline(fp, KS_SEP_LINE, &tmp) > 0 ) { kputsn(tmp.s,tmp.l,hdr); kputc('\n',hdr); } if ( hts_close(fp) ) error("Close failed: %s\n", fname); free(tmp.s); while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines kputc('\n',hdr); }
static void reheader_vcf(args_t *args) { kstring_t hdr = {0,0,0}; htsFile *fp = hts_open(args->fname, "r"); if ( !fp ) error("Failed to open: %s\n", args->fname); while ( hts_getline(fp, KS_SEP_LINE, &fp->line) >=0 ) { kputc('\n',&fp->line); // hts_getline eats the newline character if ( fp->line.s[0]!='#' ) break; kputsn(fp->line.s,fp->line.l,&hdr); } int nsamples = 0; char **samples = NULL; if ( args->samples_fname ) samples = hts_readlines(args->samples_fname, &nsamples); if ( args->header_fname ) { free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0; read_header_file(args->header_fname, &hdr); } if ( samples ) { set_samples(samples, nsamples, &hdr); int i; for (i=0; i<nsamples; i++) free(samples[i]); free(samples); } int out = STDOUT_FILENO; if ( write(out, hdr.s, hdr.l)!=hdr.l ) error("Failed to write %d bytes\n", hdr.l); free(hdr.s); if ( fp->line.l ) { if ( write(out, fp->line.s, fp->line.l)!=fp->line.l ) error("Failed to write %d bytes\n", fp->line.l); } while ( hts_getline(fp, KS_SEP_LINE, &fp->line) >=0 ) // uncompressed file implies small size, we don't worry about speed { kputc('\n',&fp->line); if ( write(out, fp->line.s, fp->line.l)!=fp->line.l ) error("Failed to write %d bytes\n", fp->line.l); } hts_close(fp); }
static void destroy_data(args_t *args) { int i; for (i=0; i<args->nfnames; i++) free(args->fnames[i]); free(args->fnames); if ( args->files ) bcf_sr_destroy(args->files); if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n"); bcf_hdr_destroy(args->out_hdr); free(args->seen_seq); free(args->start_pos); free(args->swap_phase); for (i=0; i<args->mbuf; i++) bcf_destroy(args->buf[i]); free(args->buf); free(args->GTa); free(args->GTb); free(args->nmatch); free(args->nmism); free(args->phase_qual); free(args->phase_set); }
int file_type(const char *fname) { int l = strlen(fname); if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF; else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED; else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM; else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF; else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF; else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM; else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM; htsFile *fp = hts_open(fname,"r"); enum htsExactFormat format = fp->format.format; hts_close(fp); if ( format == bcf ) return IS_BCF; if ( format == bam ) return IS_BAM; if ( format == cram ) return IS_CRAM; if ( format == vcf ) return IS_VCF; return 0; }
int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr,"anno_setter <in.vcf.gz> <columns_string>\n"); return 1; } bcf_hdr_t *h = NULL; //bcf_hdr_init(); htsFile *fp = hts_open(argv[1], "r"); if (fp == NULL) error("%s : %s", argv[1], strerror(errno)); h = bcf_hdr_read(fp); if (h == NULL) error("failed to prase header"); bcf_hdr_t *out = bcf_hdr_dup(h); char *string = strdup(argv[2]); int ncols = 0; anno_col_t *cols = init_columns(string, h, out, &ncols, anno_is_vcf); print_anno_cols(cols, ncols); hts_close(fp); return 0; }
/** * Parse intervals. Processes the interval list first followed by the interval string. Duplicates are dropped. * * @intervals - intervals stored in this vector * @interval_list - file containing intervals * @interval_string - comma delimited intervals in a string * * todo: merge overlapping sites? */ void Program::parse_intervals(std::vector<GenomeInterval>& intervals, std::string interval_list, std::string interval_string) { intervals.clear(); std::map<std::string, uint32_t> m; if (interval_list!="") { htsFile *file = hts_open(interval_list.c_str(), "r"); if (file) { kstring_t *s = &file->line; while (hts_getline(file, '\n', s)>=0) { std::string ss = std::string(s->s); if (m.find(ss)==m.end()) { m[ss] = 1; GenomeInterval interval(ss); intervals.push_back(interval); } } hts_close(file); } } std::vector<std::string> v; if (interval_string!="") split(v, ",", interval_string); for (uint32_t i=0; i<v.size(); ++i) { if (m.find(v[i])==m.end()) { m[v[i]] = 1; GenomeInterval interval(v[i]); intervals.push_back(interval); } } }
int hts_for_each_parallel(string& filename, function<void(Alignment&)> lambda) { samFile *in = hts_open(filename.c_str(), "r"); if (in == NULL) return 0; bam_hdr_t *hdr = sam_hdr_read(in); map<string, string> rg_sample; parse_rg_sample_map(hdr->text, rg_sample); int thread_count = get_thread_count(); vector<bam1_t*> bs; bs.resize(thread_count); for (auto& b : bs) { b = bam_init1(); } bool more_data = true; #pragma omp parallel shared(in, hdr, more_data, rg_sample) { int tid = omp_get_thread_num(); while (more_data) { bam1_t* b = bs[tid]; #pragma omp critical (hts_input) if (more_data) { more_data = sam_read1(in, hdr, b) >= 0; } if (more_data) { Alignment a = bam_to_alignment(b, rg_sample); lambda(a); } } } for (auto& b : bs) bam_destroy1(b); bam_hdr_destroy(hdr); hts_close(in); return 1; }