示例#1
0
int main_vcfview(int argc, char *argv[])
{
	int c, clevel = -1, in_type = FT_BCF, out_type = FT_VCF;
	char *fname_out = NULL, moder[8], modew[8];

	while ((c = getopt(argc, argv, "l:bvo:n:z?hu")) >= 0) {
		switch (c) {
            case 'o': 
                switch (optarg[0]) {
                    case 'b': out_type = FT_BCF_GZ; break;
                    case 'u': out_type = FT_BCF; break;
                    case 'z': out_type = FT_VCF_GZ; break;
                    case 'v': out_type = FT_VCF; break;
                    default: error("The output type \"%s\" not recognised\n", optarg);
                }
                break;
            case 'l': clevel = atoi(optarg); out_type |= FT_GZ; break;
            case 'v': in_type  = FT_VCF; break;
            case 'b': out_type = FT_BCF_GZ; break;
            case 'u': out_type = FT_BCF; break;
            case 'z': out_type = FT_VCF_GZ; break;
            case 'n': fname_out = optarg; break;
            case '?':
            case 'h': usage(); return 1; break;
        }
    }
	if (argc!=optind+1) { usage(); return 1; }

    // Init reader
	strcpy(moder, "r");
	if ( (!strcmp("-",argv[optind]) && (in_type & FT_BCF)) || (hts_file_type(argv[optind]) & FT_BCF)) strcat(moder, "b");
	htsFile *fp_in = hts_open(argv[optind], moder, NULL);
    if ( !fp_in ) error("Fail to open: %s\n", argv[optind]);
	bcf_hdr_t *hdr = vcf_hdr_read(fp_in);
    if ( !hdr ) error("Fail to read VCF/BCF header: %s\n", argv[optind]); 
	bcf1_t *rec = bcf_init1();

    // Init writer
    strcpy(modew, "w");
    if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
    if (out_type & FT_GZ) strcat(modew,"z");
    if (out_type & FT_BCF) strcat(modew, "b");
    if (out_type == FT_BCF) strcat(modew, "u"); // uncompressed BCF output
    htsFile *fp_out = hts_open(fname_out ? fname_out : "-", modew, NULL);

    vcf_hdr_write(fp_out, hdr);
    while ( vcf_read1(fp_in, hdr, rec) >= 0) vcf_write1(fp_out, hdr, rec);

	bcf_destroy1(rec);
	bcf_hdr_destroy(hdr);
	hts_close(fp_in);
    hts_close(fp_out);

	return 0;
}
示例#2
0
文件: vcfview.c 项目: mp15/htslib
int main_vcfview(int argc, char *argv[])
{
	int i, c, clevel = -1, flag = 0, n_samples = -1, *imap = 0, excl_snp = 0, excl_indel = 0;
	char *fn_ref = 0, *fn_out = 0, moder[8], **samples = 0;
	bcf_hdr_t *h, *hsub = 0;
	htsFile *in;
	bcf1_t *b;

	while ((c = getopt(argc, argv, "l:bSt:o:T:s:GNI")) >= 0) {
		switch (c) {
		case 'l': clevel = atoi(optarg); flag |= 2; break;
		case 'S': flag |= 1; break;
		case 'b': flag |= 2; break;
		case 'G': n_samples = 0; break;
		case 't': fn_ref = optarg; flag |= 1; break;
		case 'o': fn_out = optarg; break;
		case 's': samples = hts_readlines(optarg, &n_samples); break;
		case 'N': excl_snp = 1; break;
		case 'I': excl_indel = 1; break;
		}
	}
	if (argc == optind) {
		fprintf(stderr, "\nUsage:   vcfview [options] <in.bcf>|<in.vcf>|<in.vcf.gz>\n\n");
		fprintf(stderr, "Options: -b           output in BCF\n");
		fprintf(stderr, "         -S           input is VCF\n");
		fprintf(stderr, "         -o FILE      output file name [stdout]\n");
		fprintf(stderr, "         -l INT       compression level [%d]\n", clevel);
		fprintf(stderr, "         -t FILE      list of reference names and lengths [null]\n");
		fprintf(stderr, "         -s FILE/STR  list of samples (STR if started with ':'; FILE otherwise) [null]\n");
		fprintf(stderr, "         -G           drop individual genotype information\n");
		fprintf(stderr, "         -N           exclude SNPs\n");
		fprintf(stderr, "         -I           exclude INDELs\n");
		fprintf(stderr, "\n");
		return 1;
	}
	strcpy(moder, "r");
	if ((flag&1) == 0 && !(file_type(argv[optind])&(IS_VCF|IS_VCF_GZ))) strcat(moder, "b");

	in = hts_open(argv[optind], moder, fn_ref);
	h = vcf_hdr_read(in);
	if (h == 0) {
		fprintf(stderr, "[E::%s] fail to read the VCF/BCF2 header\n", __func__);
		hts_close(in);
		return 1;
	}
	if (n_samples >= 0) {
		if (n_samples) imap = (int*)malloc(n_samples * sizeof(int));
		hsub = bcf_hdr_subset(h, n_samples, samples, imap);
	}
	b = bcf_init1();

	if ((flag&4) == 0) { // VCF/BCF output
		htsFile *out;
		char modew[8];
		strcpy(modew, "w");
		if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
		if (flag&2) strcat(modew, "b");
		out = hts_open(fn_out? fn_out : "-", modew, 0);
		vcf_hdr_write(out, hsub? hsub : h);
		if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
			hts_idx_t *idx;
			if ((idx = bcf_index_load(argv[optind])) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BCF index\n", __func__);
				return 1;
			}
			for (i = optind + 1; i < argc; ++i) {
				hts_itr_t *iter;
				if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) {
					fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
					continue;
				}
				while (bcf_itr_next((BGZF*)in->fp, iter, b) >= 0) {
					if (excl_snp && bcf_is_snp(b)) continue;
					if (excl_indel && !bcf_is_snp(b)) continue;
					if (n_samples >= 0) {
						bcf_subset(h, b, n_samples, imap);
						vcf_write1(out, hsub, b);
					} else vcf_write1(out, h, b);
				}
				hts_itr_destroy(iter);
			}
			hts_idx_destroy(idx);
		} else {
			while (vcf_read1(in, h, b) >= 0) {
				if (excl_snp && bcf_is_snp(b)) continue;
				if (excl_indel && !bcf_is_snp(b)) continue;
				if (n_samples >= 0) {
					bcf_subset(h, b, n_samples, imap);
					vcf_write1(out, hsub, b);
				} else vcf_write1(out, h, b);
			}
		}
		hts_close(out);
	}

	bcf_destroy1(b);
	if (n_samples > 0) {
		for (i = 0; i < n_samples; ++i) free(samples[i]);
		free(samples);
		bcf_hdr_destroy(hsub);
		free(imap);
	}
	bcf_hdr_destroy(h);
	hts_close(in);
	return 0;
}
示例#3
0
int bcf_sr_add_reader(readers_t *files, const char *fname)
{
    files->readers = (reader_t*) realloc(files->readers, sizeof(reader_t)*(files->nreaders+1));
    reader_t *reader = &files->readers[files->nreaders++];
    memset(reader,0,sizeof(reader_t));

    int type = file_type(fname);
    if ( type==IS_VCF_GZ ) 
    {
        reader->tbx = tbx_index_load(fname);
        if ( !reader->tbx )
        {
            fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
            return 0;
        }

        // This is just to read the header
        htsFile *file = hts_open(fname, "r", NULL);
        if ( !file ) return 0;
        reader->header = vcf_hdr_read(file);
        hts_close(file);

        // The VCF opened in binary tabix mode
        reader->file = hts_open(fname, "rb", NULL);
        if ( !reader->file ) return 0;
    }
    else if ( type==IS_BCF ) 
    {
        reader->file = hts_open(fname, "rb", NULL);
        if ( !reader->file ) return 0;
        reader->header = vcf_hdr_read(reader->file);

        reader->bcf = bcf_index_load(fname);
        if ( !reader->bcf ) 
        {
            fprintf(stderr,"[add_reader] Could not load the index of %s\n", fname);
            return 0;   // not indexed..?
        }
    }
    else
    {
        fprintf(stderr,"Expected .vcf.gz or .bcf file\n");
        return 0;
    }

    reader->fname   = fname;
    reader->filter_id = -1;
    if ( files->apply_filters )
        reader->filter_id = bcf_id2int(reader->header, BCF_DT_ID, "PASS");

    // Update list of chromosomes
    if ( files->region )
    {
        if ( !files->seqs )
        {
            files->mseqs = files->nseqs = 1;
            files->seqs = (const char**) malloc(sizeof(const char*));
            files->seqs[0] = files->region;
        }
    }
    else
    {
        int n,i,j;
        const char **names = bcf_seqnames(reader->header, &n);
        for (i=0; i<n; i++)
        {
            for (j=0; j<files->nseqs; j++)
                if ( !strcmp(names[i],files->seqs[j]) ) break;
            if ( j<files->nseqs ) continue;     // already have this chr
            files->mseqs += 30;
            files->seqs = (const char**) realloc(files->seqs, sizeof(const char*)*files->mseqs);
            files->seqs[files->nseqs++] = names[i];
        }
        free(names);
    }
    files->iseq = -1;
    return 1;
}