Ejemplo n.º 1
0
Archivo: vcf.c Proyecto: goshng/cocoa
int bcf_hdr_set(bcf_hdr_t *hdr, const char *fname)
{
    int i, n;
    char **lines = hts_readlines(fname, &n);
    if ( !lines ) return 1;
    for (i=0; i<n-1; i++)
    {
        int k;
        bcf_hrec_t *hrec = bcf_hdr_parse_line(hdr,lines[i],&k);
        bcf_hdr_add_hrec(hdr, hrec);
        free(lines[i]);
    }
    bcf_hdr_parse_sample_line(hdr,lines[n-1]);
    free(lines[n-1]);
    free(lines);
    bcf_hdr_sync(hdr);
    bcf_hdr_fmt_text(hdr);
    return 0;
}
Ejemplo n.º 2
0
static void reheader_bcf(args_t *args, int is_compressed)
{
    htsFile *fp = hts_open(args->fname, "r"); if ( !fp ) error("Failed to open: %s\n", args->fname);
    bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Failed to read the header: %s\n", args->fname);
    kstring_t htxt = {0,0,0};
    int hlen;
    htxt.s = bcf_hdr_fmt_text(hdr, 1, &hlen);
    htxt.l = hlen;

    int i, nsamples = 0;
    char **samples = NULL;
    if ( args->samples_fname )
        samples = hts_readlines(args->samples_fname, &nsamples);
    if ( args->header_fname )
    {
        free(htxt.s); htxt.s = NULL; htxt.l = htxt.m = 0;
        read_header_file(args->header_fname, &htxt);
    }
    if ( samples )
    {
        set_samples(samples, nsamples, &htxt);
        for (i=0; i<nsamples; i++) free(samples[i]);
        free(samples);
    }

    bcf_hdr_t *hdr_out = bcf_hdr_init("r");
    bcf_hdr_parse(hdr_out, htxt.s);
    if ( args->header_fname ) hdr_out = strip_header(hdr, hdr_out);

    // write the header and the body
    htsFile *fp_out = hts_open("-",is_compressed ? "wb" : "wbu");
    bcf_hdr_write(fp_out, hdr_out);

    bcf1_t *rec = bcf_init();
    while ( bcf_read(fp, hdr, rec)==0 )
    {
        // sanity checking, this slows things down. Make it optional?
        bcf_unpack(rec, BCF_UN_ALL);
        if ( rec->rid >= hdr_out->n[BCF_DT_CTG] || strcmp(bcf_hdr_int2id(hdr,BCF_DT_CTG,rec->rid),bcf_hdr_int2id(hdr_out,BCF_DT_CTG,rec->rid)) )
            error("The CHROM is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_CTG,rec->rid));

        for (i=0; i<rec->d.n_flt; i++)
        {
            int id = rec->d.flt[i];
            if ( id >= hdr_out->n[BCF_DT_ID] ) break;
            if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_FLT,id) ) break;
            if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) )
                error("FIXME: Broken FILTER ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key);
        }
        if ( i!=rec->d.n_flt )
            error("The FILTER is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.flt[i]));

        for (i=0; i<rec->n_info; i++)
        {
            int id = rec->d.info[i].key;
            if ( id >= hdr_out->n[BCF_DT_ID] ) break;
            if ( !hdr_out->id[BCF_DT_ID][id].key ) break;
            if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_INFO,id) ) break;
            if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) )
                error("FIXME: Broken INFO ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key);
        }
        if ( i!=rec->n_info )
            error("The INFO tag is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.info[i].key));

        for (i=0; i<rec->n_fmt; i++)
        {
            int id = rec->d.fmt[i].id;
            if ( id >= hdr_out->n[BCF_DT_ID] ) break;
            if ( !hdr_out->id[BCF_DT_ID][id].key ) break;
            if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_FMT,id) ) break;
            if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) )
                error("FIXME: Broken FORMAT ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key);
        }
        if ( i!=rec->n_fmt )
            error("The FORMAT tag is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.fmt[i].id));

        bcf_write(fp_out,hdr_out,rec);
    }
    bcf_destroy(rec);

    free(htxt.s);
    hts_close(fp_out);
    hts_close(fp);
    bcf_hdr_destroy(hdr_out);
    bcf_hdr_destroy(hdr);
}
Ejemplo n.º 3
0
Archivo: vcf.c Proyecto: goshng/cocoa
bcf_hdr_t *vcf_hdr_read(htsFile *fp)
{
	if (!fp->is_bin) {
		kstring_t txt, *s = &fp->line;
		bcf_hdr_t *h;
		h = bcf_hdr_init();
		txt.l = txt.m = 0; txt.s = 0;
		while (hts_getline(fp, KS_SEP_LINE, s) >= 0) {
			if (s->l == 0) continue;
			if (s->s[0] != '#') {
				if (hts_verbose >= 2)
					fprintf(stderr, "[E::%s] no sample line\n", __func__);
				free(txt.s);
				bcf_hdr_destroy(h);
				return 0;
			}
			if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here
				int dret;
				gzFile f;
				kstream_t *ks;
				kstring_t tmp;
				tmp.l = tmp.m = 0; tmp.s = 0;
				f = gzopen(fp->fn_aux, "r");
				ks = ks_init(f);
				while (ks_getuntil(ks, 0, &tmp, &dret) >= 0) {
					int c;
					kputs("##contig=<ID=", &txt); kputs(tmp.s, &txt);
					ks_getuntil(ks, 0, &tmp, &dret);
					kputs(",length=", &txt); kputw(atol(tmp.s), &txt);
					kputsn(">\n", 2, &txt);
					if (dret != '\n')
						while ((c = ks_getc(ks)) != '\n' && c != -1); // skip the rest of the line
				}
				free(tmp.s);
				ks_destroy(ks);
				gzclose(f);
			}
			kputsn(s->s, s->l, &txt);
			if (s->s[1] != '#') break;
			kputc('\n', &txt);
		}
		h->l_text = txt.l + 1; // including NULL
		h->text = txt.s;
		bcf_hdr_parse(h);
        // check tabix index, are all contigs listed in the header? add the missing ones
        tbx_t *idx = tbx_index_load(fp->fn);
        if ( idx )
        {
			int i, n, need_sync = 0;
			const char **names = tbx_seqnames(idx, &n);
			for (i=0; i<n; i++)
			{
                bcf_hrec_t *hrec = bcf_hdr_get_hrec(h, BCF_DT_CTG, (char*) names[i]);
                if ( hrec ) continue;
                hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t));
                hrec->key = strdup("contig");
                bcf_hrec_add_key(hrec, "ID", strlen("ID"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, (char*) names[i], strlen(names[i]), 0);
                bcf_hrec_add_key(hrec, "length", strlen("length"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, "-1", strlen("-1"), 0);   // what is a good default value?
                bcf_hdr_add_hrec(h, hrec);
                need_sync = 1;
			}
			free(names);
			tbx_destroy(idx);
            if ( need_sync )
            {
                bcf_hdr_sync(h);
                bcf_hdr_fmt_text(h);
            }
		}
		return h;
	} else return bcf_hdr_read((BGZF*)fp->fp);
}
Ejemplo n.º 4
0
void isec_vcf(args_t *args)
{
    bcf_srs_t *files = args->files;
    kstring_t str = {0,0,0};
    htsFile *out_fh = NULL;

    // When only one VCF is output, print VCF to stdout
    int out_std = 0;
    if ( args->nwrite==1 ) out_std = 1;
    if ( args->targets_fname && files->nreaders==1 ) out_std = 1;
    if ( out_std ) 
    {
        out_fh = hts_open("-",hts_bcf_wmode(args->output_type));
        bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
        bcf_hdr_fmt_text(files->readers[args->iwrite].header);
        bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
    }

    int n;
    while ( (n=bcf_sr_next_line(files)) )
    {
        bcf_sr_t *reader = NULL;
        bcf1_t *line = NULL;
        int i, ret = 0;
        for (i=0; i<files->nreaders; i++)
        {
            if ( !bcf_sr_has_line(files,i) ) continue;
            if ( !line ) 
            {
                line = files->readers[i].buffer[0];
                reader = &files->readers[i];
            }
            ret |= 1<<i;    // this may overflow for many files, but will be used only with two (OP_VENN)
        }

        switch (args->isec_op) 
        {
            case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break;
            case OP_EQUAL: if ( n != args->isec_n ) continue; break;
            case OP_PLUS: if ( n < args->isec_n ) continue; break;
            case OP_MINUS: if ( n > args->isec_n ) continue;
        }

        if ( out_std )
        {
            bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]);
            continue;
        }
        else if ( args->fh_sites )
        {
            str.l = 0;
            kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str);
            kputw(line->pos+1, &str); kputc('\t', &str);
            if (line->n_allele > 0) kputs(line->d.allele[0], &str);
            else kputc('.', &str);
            kputc('\t', &str);
            if (line->n_allele > 1) kputs(line->d.allele[1], &str);
            else kputc('.', &str);
            for (i=2; i<line->n_allele; i++)
            {
                kputc(',', &str);
                kputs(line->d.allele[i], &str);
            }
            kputc('\t', &str);
            for (i=0; i<files->nreaders; i++)
                kputc(bcf_sr_has_line(files,i)?'1':'0', &str);
            kputc('\n', &str);
            fwrite(str.s,sizeof(char),str.l,args->fh_sites);
        }

        if ( args->prefix )
        {
            if ( args->isec_op==OP_VENN )
                bcf_write1(args->fh_out[ret-1], reader->header, line);
            else
            {
                for (i=0; i<files->nreaders; i++)
                {
                    if ( !bcf_sr_has_line(files,i) ) continue;
                    if ( args->write && !args->write[i] ) continue;
                    bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]);
                }
            }
        }
    }
    if ( str.s ) free(str.s);
    if ( out_fh ) hts_close(out_fh);
}