コード例 #1
0
ファイル: tabix.c プロジェクト: Illumina/akt
static int query_chroms(char *fname)
{
    const char **seq;
    int i, nseq, ftype = file_type(fname);
    if ( ftype & IS_TXT || !ftype )
    {
        tbx_t *tbx = tbx_index_load(fname);
        if ( !tbx ) error("Could not load .tbi index of %s\n", fname);
        seq = tbx_seqnames(tbx, &nseq);
        for (i=0; i<nseq; i++)
            printf("%s\n", seq[i]);
        free(seq);
        tbx_destroy(tbx);
    }
    else if ( ftype==IS_BCF )
    {
        htsFile *fp = hts_open(fname,"r");
        if ( !fp ) error("Could not read %s\n", fname);
        bcf_hdr_t *hdr = bcf_hdr_read(fp);
        if ( !hdr ) error("Could not read the header: %s\n", fname);
        hts_close(fp);
        hts_idx_t *idx = bcf_index_load(fname);
        if ( !idx ) error("Could not load .csi index of %s\n", fname);
        seq = bcf_index_seqnames(idx, hdr, &nseq);
        for (i=0; i<nseq; i++)
            printf("%s\n", seq[i]);
        free(seq);
        bcf_hdr_destroy(hdr);
        hts_idx_destroy(idx);
    }
    else if ( ftype==IS_BAM )   // todo: BAM
        error("BAM: todo\n");
    return 0;
}
コード例 #2
0
/**
 * Load index for the ith file, returns true if successful
 */
bool BCFSyncedStreamReader::load_index(int32_t i)
{
    if (ftypes[i]==FT_BCF_GZ)
    {
        if (!(idxs[i] = bcf_index_load(vcf_files[i].c_str())))
        {
            return false;
        }
    }
    else if (ftypes[i]==FT_VCF_GZ)
    {
        if (!(tbxs[i] = tbx_index_load(vcf_files[i].c_str())))
        {
            return false;
        }
    }

    return true;
}
コード例 #3
0
ファイル: bcf_synced_reader.cpp プロジェクト: holtgrewe/vt
/**
 * Load index for the ith file, returns true if successful
 */
bool BCFSyncedReader::load_index(int32_t i)
{
    if (ftypes[i].format==bcf && ftypes[i].compression==bgzf)
    {
        if (!(idxs[i] = bcf_index_load(file_names[i].c_str())))
        {
            return false;
        }
    }
    else if (ftypes[i].format==vcf && ftypes[i].compression==bgzf)
    {
        if (!(tbxs[i] = tbx_index_load(file_names[i].c_str())))
        {
            return false;
        }
    }

    return true;
}
コード例 #4
0
ファイル: vcfindex.c プロジェクト: Bratdaking/pysam
int vcf_index_stats(char *fname, int stats)
{
    char *fn_out = NULL;
    FILE *out;
    out = fn_out ? fopen(fn_out, "w") : stdout;

    const char **seq;
    int i, nseq;
    tbx_t *tbx = NULL;
    hts_idx_t *idx = NULL;

    htsFile *fp = hts_open(fname,"r");
    if ( !fp ) { fprintf(stderr,"Could not read %s\n", fname); return 1; }
    bcf_hdr_t *hdr = bcf_hdr_read(fp);
    if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", fname); return 1; }

    if ( hts_get_format(fp)->format==vcf )
    {
        tbx = tbx_index_load(fname);
        if ( !tbx ) { fprintf(stderr,"Could not load TBI index: %s\n", fname); return 1; }
    }
    else if ( hts_get_format(fp)->format==bcf )
    {
        idx = bcf_index_load(fname);
        if ( !idx ) { fprintf(stderr,"Could not load CSI index: %s\n", fname); return 1; }
    }
    else
    {
        fprintf(stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
        return 1;
    }

    seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq);
    uint64_t sum = 0;
    for (i=0; i<nseq; i++)
    {
        uint64_t records, v;
        hts_idx_get_stat(tbx ? tbx->idx : idx, i, &records, &v);
        sum+=records;
        if (stats&2 || !records) continue;
        bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL);
        int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
        fprintf(out,"%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
    }
    if (!sum)
    {
        // No counts found.
        // Is this because index version has no stored count data, or no records?
        bcf1_t *rec = bcf_init1();
        if (bcf_read1(fp, hdr, rec) >= 0)
        {
            fprintf(stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
            return 1;
        }
        bcf_destroy1(rec);
    }
    if (stats&2) fprintf(out, "%" PRIu64 "\n", sum);
    free(seq);
    fclose(out);
    hts_close(fp);
    bcf_hdr_destroy(hdr);
    if (tbx)
        tbx_destroy(tbx);
    if (idx)
        hts_idx_destroy(idx);
    return 0;
}
コード例 #5
0
ファイル: vcfhdr2json.cpp プロジェクト: Illumina/hap.py
int main(int argc, char* argv[]) {
    namespace po = boost::program_options;

    std::string file;
    std::string output;

    try
    {
        // Declare the supported options.
        po::options_description desc("Allowed options");
        desc.add_options()
            ("help,h", "produce help message")
            ("version", "Show version")            
            ("input-file", po::value< std::string >(), "The input files")
            ("output-file", po::value<std::string>(), "The output file name.")
        ;

        po::positional_options_description popts;
        popts.add("input-file", 1);
        popts.add("output-file", 1);

        po::options_description cmdline_options;
        cmdline_options
            .add(desc)
        ;

        po::variables_map vm;
        
        po::store(po::command_line_parser(argc, argv).
                  options(cmdline_options).positional(popts).run(), vm);
        po::notify(vm); 

        if (vm.count("version")) 
        {
            std::cout << "vcfhdr2json version " << HAPLOTYPES_VERSION << "\n";
            return 0;
        }

        if (vm.count("help")) 
        {
            std::cout << desc << "\n";
            return 1;
        }

        if (vm.count("input-file"))
        {
            file = vm["input-file"].as< std::string > ();
        }

        if (vm.count("output-file"))
        {
            output = vm["output-file"].as< std::string >();
        }

        if(file.size() == 0)
        {
            std::cerr << "Please specify an input file.\n";
            return 1;
        }

        if (output == "")
        {
            std::cerr << "Please specify an output file.\n";
            return 1; 
        }
    } 
    catch (po::error & e)
    {
        std::cerr << e.what() << "\n";
        return 1;
    }

    try
    {
        Json::StyledWriter writer;
        htsFile * fp = bcf_open(file.c_str(), "r");
        bcf_hdr_t * hdr = bcf_hdr_read(fp);

        Json::Value root;
        Json::Value a;
        for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i)
        {
            a.append(hdr->samples[i]);
        }
        root["samples"] = a;

        Json::Value fields;
        for (int i = 0; i < hdr->nhrec; i++)
        {
            Json::Value field;
            field["key"] = hdr->hrec[i]->key;
            if (!hdr->hrec[i]->value)
            {
                Json::Value values;

                for (int j = 0; j < hdr->hrec[i]->nkeys; j++)
                {
                    values[hdr->hrec[i]->keys[j]] = hdr->hrec[i]->vals[j];
                }
                field["values"] = values;
            }
            else
            {
                field["value"] = hdr->hrec[i]->value;
            }
            fields.append(field);
        }
        root["fields"] = fields;

        tbx_t * tbx_idx = tbx_index_load(file.c_str());
        if ( !tbx_idx )
        {
            hts_idx_t * csi_idx = bcf_index_load(file.c_str());
            if(!csi_idx)
            {
                root["tabix"] = Json::Value::null;
            }
            else
            {
                root["tabix"] = Json::Value();
                root["tabix"]["chromosomes"] = Json::Value();

                int count = 0;
                const char ** tbx_names = bcf_index_seqnames(csi_idx, hdr, &count);

                for (int i = 0; i < count; ++i)
                {
                    root["tabix"]["chromosomes"].append(tbx_names[i]);
                }
                free(tbx_names);
                hts_idx_destroy(csi_idx);
            }
        }
        else
        {
            root["tabix"] = Json::Value();
            root["tabix"]["chromosomes"] = Json::Value();

            int count = 0;
            const char ** tbx_names = tbx_seqnames(tbx_idx, &count);

            for (int i = 0; i < count; ++i)
            {
                root["tabix"]["chromosomes"].append(tbx_names[i]);
            }

            free(tbx_names);
            tbx_destroy(tbx_idx);
        }


        std::ofstream out(output.c_str());
        out << writer.write(root);

        bcf_close(fp);
        bcf_hdr_destroy(hdr);
    } 
    catch(std::runtime_error & e)
    {
        std::cerr << e.what() << std::endl;
        return 1;
    }
    catch(std::logic_error & e)
    {
        std::cerr << e.what() << std::endl;
        return 1;
    }

    return 0;
}
コード例 #6
0
ファイル: vcfview.c プロジェクト: mp15/htslib
int main_vcfview(int argc, char *argv[])
{
	int i, c, clevel = -1, flag = 0, n_samples = -1, *imap = 0, excl_snp = 0, excl_indel = 0;
	char *fn_ref = 0, *fn_out = 0, moder[8], **samples = 0;
	bcf_hdr_t *h, *hsub = 0;
	htsFile *in;
	bcf1_t *b;

	while ((c = getopt(argc, argv, "l:bSt:o:T:s:GNI")) >= 0) {
		switch (c) {
		case 'l': clevel = atoi(optarg); flag |= 2; break;
		case 'S': flag |= 1; break;
		case 'b': flag |= 2; break;
		case 'G': n_samples = 0; break;
		case 't': fn_ref = optarg; flag |= 1; break;
		case 'o': fn_out = optarg; break;
		case 's': samples = hts_readlines(optarg, &n_samples); break;
		case 'N': excl_snp = 1; break;
		case 'I': excl_indel = 1; break;
		}
	}
	if (argc == optind) {
		fprintf(stderr, "\nUsage:   vcfview [options] <in.bcf>|<in.vcf>|<in.vcf.gz>\n\n");
		fprintf(stderr, "Options: -b           output in BCF\n");
		fprintf(stderr, "         -S           input is VCF\n");
		fprintf(stderr, "         -o FILE      output file name [stdout]\n");
		fprintf(stderr, "         -l INT       compression level [%d]\n", clevel);
		fprintf(stderr, "         -t FILE      list of reference names and lengths [null]\n");
		fprintf(stderr, "         -s FILE/STR  list of samples (STR if started with ':'; FILE otherwise) [null]\n");
		fprintf(stderr, "         -G           drop individual genotype information\n");
		fprintf(stderr, "         -N           exclude SNPs\n");
		fprintf(stderr, "         -I           exclude INDELs\n");
		fprintf(stderr, "\n");
		return 1;
	}
	strcpy(moder, "r");
	if ((flag&1) == 0 && !(file_type(argv[optind])&(IS_VCF|IS_VCF_GZ))) strcat(moder, "b");

	in = hts_open(argv[optind], moder, fn_ref);
	h = vcf_hdr_read(in);
	if (h == 0) {
		fprintf(stderr, "[E::%s] fail to read the VCF/BCF2 header\n", __func__);
		hts_close(in);
		return 1;
	}
	if (n_samples >= 0) {
		if (n_samples) imap = (int*)malloc(n_samples * sizeof(int));
		hsub = bcf_hdr_subset(h, n_samples, samples, imap);
	}
	b = bcf_init1();

	if ((flag&4) == 0) { // VCF/BCF output
		htsFile *out;
		char modew[8];
		strcpy(modew, "w");
		if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
		if (flag&2) strcat(modew, "b");
		out = hts_open(fn_out? fn_out : "-", modew, 0);
		vcf_hdr_write(out, hsub? hsub : h);
		if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
			hts_idx_t *idx;
			if ((idx = bcf_index_load(argv[optind])) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BCF index\n", __func__);
				return 1;
			}
			for (i = optind + 1; i < argc; ++i) {
				hts_itr_t *iter;
				if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) {
					fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
					continue;
				}
				while (bcf_itr_next((BGZF*)in->fp, iter, b) >= 0) {
					if (excl_snp && bcf_is_snp(b)) continue;
					if (excl_indel && !bcf_is_snp(b)) continue;
					if (n_samples >= 0) {
						bcf_subset(h, b, n_samples, imap);
						vcf_write1(out, hsub, b);
					} else vcf_write1(out, h, b);
				}
				hts_itr_destroy(iter);
			}
			hts_idx_destroy(idx);
		} else {
			while (vcf_read1(in, h, b) >= 0) {
				if (excl_snp && bcf_is_snp(b)) continue;
				if (excl_indel && !bcf_is_snp(b)) continue;
				if (n_samples >= 0) {
					bcf_subset(h, b, n_samples, imap);
					vcf_write1(out, hsub, b);
				} else vcf_write1(out, h, b);
			}
		}
		hts_close(out);
	}

	bcf_destroy1(b);
	if (n_samples > 0) {
		for (i = 0; i < n_samples; ++i) free(samples[i]);
		free(samples);
		bcf_hdr_destroy(hsub);
		free(imap);
	}
	bcf_hdr_destroy(h);
	hts_close(in);
	return 0;
}
コード例 #7
0
ファイル: synced_bcf_reader.c プロジェクト: goshng/cocoa
int bcf_sr_add_reader(readers_t *files, const char *fname)
{
    files->readers = (reader_t*) realloc(files->readers, sizeof(reader_t)*(files->nreaders+1));
    reader_t *reader = &files->readers[files->nreaders++];
    memset(reader,0,sizeof(reader_t));

    int type = file_type(fname);
    if ( type==IS_VCF_GZ ) 
    {
        reader->tbx = tbx_index_load(fname);
        if ( !reader->tbx )
        {
            fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
            return 0;
        }

        // This is just to read the header
        htsFile *file = hts_open(fname, "r", NULL);
        if ( !file ) return 0;
        reader->header = vcf_hdr_read(file);
        hts_close(file);

        // The VCF opened in binary tabix mode
        reader->file = hts_open(fname, "rb", NULL);
        if ( !reader->file ) return 0;
    }
    else if ( type==IS_BCF ) 
    {
        reader->file = hts_open(fname, "rb", NULL);
        if ( !reader->file ) return 0;
        reader->header = vcf_hdr_read(reader->file);

        reader->bcf = bcf_index_load(fname);
        if ( !reader->bcf ) 
        {
            fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
            return 0;   // not indexed..?
        }
    }
    else
    {
        fprintf(stderr,"Expected .vcf.gz or .bcf file\n");
        return 0;
    }

    reader->fname   = fname;
    reader->filter_id = -1;
    if ( files->apply_filters )
        reader->filter_id = bcf_id2int(reader->header, BCF_DT_ID, "PASS");

    // Update list of chromosomes
    if ( files->region )
    {
        if ( !files->seqs )
        {
            files->mseqs = files->nseqs = 1;
            files->seqs = (const char**) malloc(sizeof(const char*));
            files->seqs[0] = files->region;
        }
    }
    else
    {
        int n,i,j;
        const char **names = bcf_seqnames(reader->header, &n);
        for (i=0; i<n; i++)
        {
            for (j=0; j<files->nseqs; j++)
                if ( !strcmp(names[i],files->seqs[j]) ) break;
            if ( j<files->nseqs ) continue;     // already have this chr
            files->mseqs += 30;
            files->seqs = (const char**) realloc(files->seqs, sizeof(const char*)*files->mseqs);
            files->seqs[files->nseqs++] = names[i];
        }
        free(names);
    }
    files->iseq = -1;
    return 1;
}
コード例 #8
0
ファイル: tabix.c プロジェクト: Illumina/akt
static int query_regions(args_t *args, char *fname, char **regs, int nregs)
{
    int i;
    htsFile *fp = hts_open(fname,"r");
    if ( !fp ) error("Could not read %s\n", fname);
    enum htsExactFormat format = hts_get_format(fp)->format;

    regidx_t *reg_idx = NULL;
    if ( args->targets_fname )
    {
        reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL);
        if ( !reg_idx ) error("Could not read %s\n", args->targets_fname);
    }

    if ( format == bcf )
    {
        htsFile *out = hts_open("-","w");
        if ( !out ) error("Could not open stdout\n", fname);
        hts_idx_t *idx = bcf_index_load(fname);
        if ( !idx ) error("Could not load .csi index of %s\n", fname);
        bcf_hdr_t *hdr = bcf_hdr_read(fp);
        if ( !hdr ) error("Could not read the header: %s\n", fname);
        if ( args->print_header )
            bcf_hdr_write(out,hdr);
        if ( !args->header_only )
        {
            bcf1_t *rec = bcf_init();
            for (i=0; i<nregs; i++)
            {
                hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]);
                while ( bcf_itr_next(fp, itr, rec) >=0 )
                {
                    if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue;
                    bcf_write(out,hdr,rec);
                }
                tbx_itr_destroy(itr);
            }
            bcf_destroy(rec);
        }
        if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n");
        bcf_hdr_destroy(hdr);
        hts_idx_destroy(idx);
    }
    else if ( format==vcf || format==sam || format==unknown_format )
    {
        tbx_t *tbx = tbx_index_load(fname);
        if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname);
        kstring_t str = {0,0,0};
        if ( args->print_header )
        {
            while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 )
            {
                if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break;
                puts(str.s);
            }
        }
        if ( !args->header_only )
        {
            int nseq;
            const char **seq = NULL;
            if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq);
            for (i=0; i<nregs; i++)
            {
                hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]);
                if ( !itr ) continue;
                while (tbx_itr_next(fp, tbx, itr, &str) >= 0)
                {
                    if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue;
                    puts(str.s);
                }
                tbx_itr_destroy(itr);
            }
            free(seq);
        }
        free(str.s);
        tbx_destroy(tbx);
    }
    else if ( format==bam )
        error("Please use \"samtools view\" for querying BAM files.\n");

    if ( reg_idx ) regidx_destroy(reg_idx);
    if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname);

    for (i=0; i<nregs; i++) free(regs[i]);
    free(regs);
    return 0;
}
コード例 #9
0
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
{
    htsFile* file_ptr = hts_open(fname, "r");
    if ( ! file_ptr ) {
        files->errnum = open_failed;
        return 0;
    }

    files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1));
    files->has_line[files->nreaders] = 0;
    files->readers  = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1));
    bcf_sr_t *reader = &files->readers[files->nreaders++];
    memset(reader,0,sizeof(bcf_sr_t));

    reader->file = file_ptr;

    files->errnum = 0;

    if ( files->require_index )
    {
        if ( reader->file->format.format==vcf )
        {
            if ( reader->file->format.compression!=bgzf )
            {
                files->errnum = not_bgzf;
                return 0;
            }

            reader->tbx_idx = tbx_index_load(fname);
            if ( !reader->tbx_idx )
            {
                files->errnum = idx_load_failed;
                return 0;
            }

            reader->header = bcf_hdr_read(reader->file);
        }
        else if ( reader->file->format.format==bcf )
        {
            if ( reader->file->format.compression!=bgzf )
            {
                files->errnum = not_bgzf;
                return 0;
            }

            reader->header = bcf_hdr_read(reader->file);

            reader->bcf_idx = bcf_index_load(fname);
            if ( !reader->bcf_idx )
            {
                files->errnum = idx_load_failed;
                return 0;
            }
        }
        else
        {
            files->errnum = file_type_error;
            return 0;
        }
    }
    else
    {
        if ( reader->file->format.format==bcf || reader->file->format.format==vcf )
        {
            reader->header = bcf_hdr_read(reader->file);
        }
        else
        {
            files->errnum = file_type_error;
            return 0;
        }
        files->streaming = 1;
    }
    if ( files->streaming && files->nreaders>1 )
    {
        files->errnum = api_usage_error;
        fprintf(stderr,"[%s:%d %s] Error: %d readers, yet require_index not set\n", __FILE__,__LINE__,__FUNCTION__,files->nreaders);
        return 0;
    }
    if ( files->streaming && files->regions )
    {
        files->errnum = api_usage_error;
        fprintf(stderr,"[%s:%d %s] Error: cannot tabix-jump in streaming mode\n", __FILE__,__LINE__,__FUNCTION__);
        return 0;
    }
    if ( !reader->header )
    {
        files->errnum = header_error;
        return 0;
    }

    reader->fname = fname;
    if ( files->apply_filters )
        reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids);

    // Update list of chromosomes
    if ( !files->explicit_regs && !files->streaming )
    {
        int n,i;
        const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n);
        for (i=0; i<n; i++)
        {
            if ( !files->regions )
                files->regions = _regions_init_string(names[i]);
            else
                _regions_add(files->regions, names[i], -1, -1);
        }
        free(names);
    }

    return 1;
}
コード例 #10
0
ファイル: synced_bcf_reader.c プロジェクト: NVlabs/nvbio
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
{
    files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1));
    files->has_line[files->nreaders] = 0;
    files->readers  = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1));
    bcf_sr_t *reader = &files->readers[files->nreaders++];
    memset(reader,0,sizeof(bcf_sr_t));

    reader->file = hts_open(fname, "r");
    if ( !reader->file ) return 0;

    reader->type = reader->file->is_bin? FT_BCF : FT_VCF;
    if (reader->file->is_compressed) reader->type |= FT_GZ;

    if ( files->require_index )
    {
        if ( reader->type==FT_VCF_GZ ) 
        {
            reader->tbx_idx = tbx_index_load(fname);
            if ( !reader->tbx_idx )
            {
                fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
                return 0;
            }

            reader->header = bcf_hdr_read(reader->file);
        }
        else if ( reader->type==FT_BCF_GZ ) 
        {
            reader->header = bcf_hdr_read(reader->file);

            reader->bcf_idx = bcf_index_load(fname);
            if ( !reader->bcf_idx ) 
            {
                fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
                return 0;   // not indexed..?
            }
        }
        else
        {
            fprintf(stderr,"Index required, expected .vcf.gz or .bcf file: %s\n", fname);
            return 0;
        }
    }
    else 
    {
        if ( reader->type & FT_BCF )
        {
            reader->header = bcf_hdr_read(reader->file);
        }
        else if ( reader->type & FT_VCF )
        {
            reader->header = bcf_hdr_read(reader->file);
        }
        else
        {
            fprintf(stderr,"File type not recognised: %s\n", fname);
            return 0;
        }
        files->streaming = 1;
    }
    if ( files->streaming && files->nreaders>1 )
    {
        fprintf(stderr,"[%s:%d %s] Error: %d readers, yet require_index not set\n", __FILE__,__LINE__,__FUNCTION__,files->nreaders);
        return 0;
    }
    if ( files->streaming && files->regions )
    {
        fprintf(stderr,"[%s:%d %s] Error: cannot tabix-jump in streaming mode\n", __FILE__,__LINE__,__FUNCTION__);
        return 0;
    }
    if ( !reader->header ) return 0;

    reader->fname = fname;
    if ( files->apply_filters )
        reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids);

    // Update list of chromosomes
    if ( !files->explicit_regs && !files->streaming )
    {
        int n,i;
        const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n);
        for (i=0; i<n; i++)
        {
            if ( !files->regions )
                files->regions = _regions_init_string(names[i]);
            else
                _regions_add(files->regions, names[i], -1, -1);
        }
        free(names);
    }

    return 1;
}
コード例 #11
0
ファイル: bcf_ordered_reader.cpp プロジェクト: kchennen/vt
BCFOrderedReader::BCFOrderedReader(std::string file_name, std::vector<GenomeInterval>& intervals)
{
    this->file_name = (file_name=="+")? "-" : file_name;
    file = NULL;
    hdr = NULL;
    idx = NULL;
    tbx = NULL;
    itr = NULL;

    this->intervals = intervals;
    interval_index = 0;
    index_loaded = false;

    file = hts_open(this->file_name.c_str(), "r");
    if (!file)
    {
        fprintf(stderr, "[%s:%d %s] Cannot open %s\n", __FILE__, __LINE__, __FUNCTION__, file_name.c_str());
        exit(1);    
    }    
    ftype = file->format;

    if (ftype.format!=vcf && ftype.format!=bcf)
    {
        fprintf(stderr, "[%s:%d %s] Not a VCF/BCF file: %s\n", __FILE__, __LINE__, __FUNCTION__, file_name.c_str());
        exit(1);
    }

    s = {0, 0, 0};
    if (file==NULL) exit(1);
    hdr = bcf_alt_hdr_read(file);
    if (!hdr) exit(1);

    intervals_present =  intervals.size()!=0;

    if (ftype.format==bcf)
    {   
        if ((idx = bcf_index_load(file_name.c_str())))
        {
            index_loaded = true;
        }
        else
        {    
            if (intervals_present)
            {
                fprintf(stderr, "[E:%s] index cannot be loaded for %s for random access, ignoring specified intervals and reading from start.\n", __FUNCTION__, file_name.c_str());
//                exit(1);
            }
        }
    }
    else if (ftype.format==vcf)
    {
        if (ftype.compression==bgzf)
        {    
            if ((tbx = tbx_index_load(file_name.c_str())))
            {
                index_loaded = true;
            }
            else
            {
                if (intervals_present)
                {
                    fprintf(stderr, "[E:%s] index cannot be loaded for %s for random access, ignoring specified intervals and reading from start.\n", __FUNCTION__, file_name.c_str());
//                    exit(1);
                }
            }
        }
        else
        {
            if (intervals_present)
            {
                fprintf(stderr, "[E:%s] no random access support for VCF file: %s\n", __FUNCTION__, file_name.c_str());
//                exit(1);
            }
        }
    }

    random_access_enabled = intervals_present && index_loaded;
};
コード例 #12
0
ファイル: convert.c プロジェクト: srw6v/gqt
int convert(int argc, char **argv)
{
    if (argc < 2) return convert_help();

    int c;
    char *in=NULL, *out=NULL, *bim=NULL, *vid=NULL, *tmp_dir=NULL, *ped=NULL;
    uint32_t num_fields, num_records, col = 2;
    int i_is_set = 0, 
        o_is_set = 0, 
        f_is_set = 0, 
        b_is_set = 0, 
        v_is_set = 0, 
        t_is_set = 0, 
        p_is_set = 0, 
        r_is_set = 0; 

    while((c = getopt (argc, argv, "hi:o:f:r:b:v:t:p:c:")) != -1) {
        switch (c) {
            case 'c':
                col = atoi(optarg);
                break;
            case 'p':
                p_is_set = 1;
                ped = optarg;
                break;
            case 't':
                t_is_set = 1;
                tmp_dir = optarg;
                break;
            case 'v':
                v_is_set = 1;
                vid = optarg;
                break;
            case 'b':
                b_is_set = 1;
                bim = optarg;
                break;
            case 'i':
                i_is_set = 1;
                in = optarg;
                break;
            case 'o':
                o_is_set = 1;
                out = optarg;
                break;
            case 'f':
                f_is_set = 1;
                num_fields = atoi(optarg);
                break;
            case 'r':
                r_is_set = 1;
                num_records = atoi(optarg);
                break;
            case 'h':
                convert_help();
                return 1;
            case '?':
                if ( (optopt == 'i') || 
                     (optopt == 'f') ||
                     (optopt == 'r') ||
                     (optopt == 't') ||
                     (optopt == 's') ||
                     (optopt == 'p') ||
                     (optopt == 'c') ||
                     (optopt == 'o') )
                    fprintf (stderr, "Option -%c requires an argument.\n",
                            optopt);
                else if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt);
            default:
                convert_help();
                return 1;
        }
    }

    char *type = argv[0];

    if (i_is_set == 0) {
        printf("Input file is not set\n");
        return convert_help();
    } 

    if (strcmp(type, "bcf") == 0) {
        if ( (f_is_set == 0) || (r_is_set == 0) ) {

            fprintf(stderr,"Attempting to autodetect num of records "
                    "and fields from %s\n", in);
            //Try and auto detect the sizes, need the index
            tbx_t *tbx = NULL;
            hts_idx_t *idx = NULL;
            htsFile *fp    = hts_open(in,"rb");
            if ( !fp ) {
                fprintf(stderr,"Could not read %s\n", in);
                return 1;
            }

            bcf_hdr_t *hdr = bcf_hdr_read(fp);
            if ( !hdr ) {
                fprintf(stderr,"Could not read the header: %s\n", in);
                return 1;
            }

            if (hts_get_format(fp)->format==vcf) {
                tbx = tbx_index_load(in);
                if ( !tbx ) { 
                    fprintf(stderr,"Could not load TBI index: %s\n", in);
                    return 1;
                }
            } else if ( hts_get_format(fp)->format==bcf ) {
                idx = bcf_index_load(in);
                if ( !idx ) {
                    fprintf(stderr,"Could not load CSI index: %s\n", in);
                    return 1;
                }
            } else {
                fprintf(stderr,
                        "Could not detect the file type as VCF or BCF: %s\n",
                        in);
                return 1;
            }

            num_fields = hdr->n[BCF_DT_SAMPLE];

            num_records = 0;
            const char **seq;
            int nseq;
            seq = tbx ? tbx_seqnames(tbx, &nseq) : 
                    bcf_index_seqnames(idx, hdr, &nseq);
            int i;
            uint32_t sum = 0;
            for (i = 0; i < nseq; ++i) {
                uint64_t records, v;
                hts_idx_get_stat(tbx ? tbx->idx: idx, i, &records, &v);
                num_records += records;
            }

            fprintf(stderr, "Number of records:%u\tNumber of fields:%u\n",
                    num_records, num_fields);
            free(seq);
            hts_close(fp);
            bcf_hdr_destroy(hdr);
            if (idx)
                hts_idx_destroy(idx);
            if (tbx)
                tbx_destroy(tbx);
        }


        if (o_is_set == 0) {
            out  = (char*)malloc(strlen(in) + 5); // 5 for ext and \0
            strcpy(out,in);
            strcat(out, ".gqt");
        }
        if (b_is_set == 0) {
            bim  = (char*)malloc(strlen(in) + 5); // 5 for ext and \0
            strcpy(bim,in);
            strcat(bim, ".bim");
        }
        if (v_is_set == 0) {
            vid  = (char*)malloc(strlen(in) + 5); // 5 for ext and \0
            strcpy(vid,in);
            strcat(vid, ".vid");
        }
        if (t_is_set == 0) {
            tmp_dir  = (char*)malloc(3*sizeof(char)); // "./\0"
            strcpy(tmp_dir,"./");
        }

        int r = bcf_wahbm(in, out, bim, vid, tmp_dir, num_fields, num_records);

        return r;
    } 

    if (strcmp(type, "ped") == 0)  {
        if (o_is_set == 0) {
            if (p_is_set == 1) {
                out  = (char*)malloc(strlen(ped) + 4); // 4 for ext and \0
                strcpy(out,ped);
                strcat(out, ".db");
            } else {
                out  = (char*)malloc(strlen(in) + 4); // 4 for ext and \0
                strcpy(out,in);
                strcat(out, ".db");
            }
      }

      fprintf(stderr, "Creating sample database %s\n", out);
      return ped_ped(in, ped, col, out);
    }
    return convert_help();
}