C++ (Cpp) bgzf_dopen示例

示例#1

0

显示文件

文件： bam_reheader.c 项目： AngieHinrichs/samtabix

int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
	BGZF *fp;
	bam_header_t *old;
	int len;
	uint8_t *buf;
	if (in->open_mode != 'r') return -1;
	buf = malloc(BUF_SIZE);
	old = bam_header_read(in);
	fp = bgzf_dopen(fd, "w");
	bam_header_write(fp, h);
	if (in->block_offset < in->block_length) {
		bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
		bgzf_flush(fp);
	}
#ifdef _USE_KNETFILE
	while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0)
		fwrite(buf, 1, len, fp->fp);
#else
	while (!feof(in->fp) && (len = fread(buf, 1, BUF_SIZE, in->fp)) > 0)
		fwrite(buf, 1, len, fp->fp);
#endif
	free(buf);
	fp->block_offset = in->block_offset = 0;
	bgzf_close(fp);
	return 0;
}

示例#2

0

显示文件

文件： VariantList.cpp 项目： WilliamRichards2017/graphite

	void VariantList::printToCompressedVCF(IHeader::SharedPtr headerPtr, bool printHeader, int out)
	{
		BGZF* fp = bgzf_dopen(out, "w");
		if (printHeader)
		{
			bgzf_write(fp, headerPtr->getHeader().c_str(), headerPtr->getHeader().size());
		}
		for(const auto variantPtr : this->m_variant_ptrs)
		{
			bgzf_write(fp, variantPtr->getVariantLine(headerPtr).c_str(), variantPtr->getVariantLine(headerPtr).size());
		}
		bgzf_close(fp);
	}

示例#3

0

显示文件

文件： nanopolish_read_db.cpp 项目： jts/nanopolish

void ReadDB::import_reads(const std::string& input_filename, const std::string& out_fasta_filename)
{
    // Open readers
    FILE* read_fp = fopen(input_filename.c_str(), "r");
    if(read_fp == NULL) {
        fprintf(stderr, "error: could not open %s for read\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    gzFile gz_read_fp = gzdopen(fileno(read_fp), "r");
    if(gz_read_fp == NULL) {
        fprintf(stderr, "error: could not open %s using gzdopen\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // Open writers
    FILE* write_fp = fopen(out_fasta_filename.c_str(), "w");
    if(write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    BGZF* bgzf_write_fp = bgzf_dopen(fileno(write_fp), "w");
    if(bgzf_write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for bgzipped write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // read input sequences, add to DB and convert to fasta
    int ret = 0;
    kseq_t* seq = kseq_init(gz_read_fp);
    while((ret = kseq_read(seq)) >= 0) {

        // Check for a path to the fast5 file in the comment of the read
        std::string path = "";
        if(seq->comment.l > 0) {

            // This splitting code implicitly handles both the 2 and 3 field
            // fasta format that poretools will output. The FAST5 path
            // is always the last field.
            std::vector<std::string> fields = split(seq->comment.s, ' ');
            path = fields.back();

            // as a sanity check we require the path name to end in ".fast5"
            if(path.length() < 6 || path.substr(path.length() - 6) != ".fast5") {
                path = "";
            }
        }
        
        // sanity check that the read does not exist in the database
        // JTS 04/2019: changed error to warning to account for duplicate reads coming out of
        // some versions of guppy.
        auto iter = m_data.find(seq->name.s);
        if(iter != m_data.end()) {
            fprintf(stderr, "Warning: duplicate read name %s found in fasta file\n", seq->name.s);
            continue;
        }
        
        // add path
        add_signal_path(seq->name.s, path);

        // write sequence in gzipped fasta for fai indexing later
        std::string out_record;
        out_record += ">";
        out_record += seq->name.s;
        out_record += "\n";
        out_record += seq->seq.s;
        out_record += "\n";
        size_t write_length = bgzf_write(bgzf_write_fp, out_record.c_str(), out_record.length());
        if(write_length != out_record.length()) {
            fprintf(stderr, "error in bgzf_write, aborting\n");
            exit(EXIT_FAILURE);
        }
    }

    // check for abnormal exit conditions
    if(ret <= -2) {
        fprintf(stderr, "kseq_read returned %d indicating an error with the input file %s\n", ret, input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // cleanup
    kseq_destroy(seq);
    
    gzclose(gz_read_fp);
    fclose(read_fp);

    bgzf_close(bgzf_write_fp);
    fclose(write_fp);
}

示例#4

0

显示文件

文件： bcf_file.cpp 项目： DylanGraham/vcftools

void bcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2','\2'};
	bgzf_write(out, magic, 5);

	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
	{
		header_str += "\t";
		header_str += meta_data.indv[ui];
	}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );
	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );
	vector<char> variant_line;
	entry * e = new bcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}

示例#5

0

显示文件

文件： vcf_file.cpp 项目： kiwiroy/vcftools

void vcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2', '\1'};
	bgzf_write(out, magic, 5);

	if (meta_data.has_idx)
	{
		LOG.warning("VCF file contains IDX values in header. These are being removed for conversion to BCF.");
		meta_data.reprint();
		meta_data.reparse();
	}
	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	if (meta_data.has_contigs == false)
	{
		vector<string> contig_vector;
		get_contigs(params.contigs_file, contig_vector);

		for(unsigned int ui=0; ui<contig_vector.size(); ui++)
		{
			meta_data.add_CONTIG_descriptor(contig_vector[ui].substr(10, contig_vector[ui].size()-8),int(ui));
			for(unsigned int uj=0; uj<contig_vector[ui].size(); uj++)
				header.push_back(contig_vector[ui][uj]);
			header.push_back('\n');
		}
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
		{
			header_str += "\t";
			header_str += meta_data.indv[ui];
		}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );

	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );

	vector<char> variant_line;
	entry * e = new vcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true,true,true,true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}

示例#6

0

显示文件

文件： reheader.c 项目： adeelmahmood/alignmentportal

static void reheader_vcf_gz(args_t *args)
{
    BGZF *fp = bgzf_open(args->fname,"r");
    if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
        error("Failed to read %s: %s\n", args->fname, strerror(errno));

    kstring_t hdr = {0,0,0};
    char *buffer = (char*) fp->uncompressed_block;

    // Read the header and find the position of the data block
    if ( buffer[0]!='#' ) error("Could not parse the header, expected '#', found '%c'\n", buffer[0]);

    int skip_until = 1;     // end of the header in the current uncompressed block
    while (1)
    {
        if ( buffer[skip_until]=='\n' )
        {
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                kputsn(buffer,skip_until,&hdr);
                if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", args->fname);
                skip_until = 0;
            }
            // The header has finished
            if ( buffer[skip_until]!='#' )
            {
                kputsn(buffer,skip_until,&hdr);
                break;
            }
        }
        skip_until++;
        if ( skip_until>=fp->block_length )
        {
            kputsn(buffer,fp->block_length,&hdr);
            if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", args->fname);
            skip_until = 0;
        }
    }

    int nsamples = 0;
    char **samples = NULL;
    if ( args->samples_fname )
        samples = hts_readlines(args->samples_fname, &nsamples);
    if ( args->header_fname )
    {
        free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
        read_header_file(args->header_fname, &hdr);
    }
    if ( samples )
    {
        set_samples(samples, nsamples, &hdr);
        int i;
        for (i=0; i<nsamples; i++) free(samples[i]);
        free(samples);
    }

    // Output the modified header
    BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w");
    bgzf_write(bgzf_out, hdr.s, hdr.l);
    free(hdr.s);

    // Output all remainig data read with the header block
    if ( fp->block_length - skip_until > 0 )
    {
        if ( bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until)<0 ) error("Error: %d\n",fp->errcode);
    }
    if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);

    // Stream the rest of the file without as it is, without decompressing
    ssize_t nread;
    int page_size = getpagesize();
    char *buf = (char*) valloc(page_size);
    while (1)
    {
        nread = bgzf_raw_read(fp, buf, page_size);
        if ( nread<=0 ) break;

        int count = bgzf_raw_write(bgzf_out, buf, nread);
        if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }
    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
    if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
    free(buf);
}

示例#7

0

显示文件

文件： bam2fq.c 项目： kmsquire/htslib

int main_bam2fq(int argc, char *argv[])
{
    BGZF *fp, *fpse = 0;
    bam1_t *b;
    uint8_t *buf;
    int max_buf, c, has12 = 0;
    kstring_t str;
    int64_t n_singletons = 0, n_reads = 0;
    char last[512], *fnse = 0;

    while ((c = getopt(argc, argv, "as:")) > 0)
        if (c == 'a') has12 = 1;
        else if (c == 's') fnse = optarg;
    if (argc == optind) {
        fprintf(stderr, "\nUsage:   bam2fq [-a] [-s outSE] <in.bam>\n\n");
        fprintf(stderr, "Options: -a        append /1 and /2 to the read name\n");
        fprintf(stderr, "         -s FILE   write singleton reads to FILE [assume single-end]\n");
        fprintf(stderr, "\n");
        return 1;
    }
    fp = strcmp(argv[optind], "-")? bgzf_open(argv[optind], "r") : bgzf_dopen(fileno(stdin), "r");
    assert(fp);
    bam_hdr_destroy(bam_hdr_read(fp));
    buf = 0;
    max_buf = 0;
    str.l = str.m = 0;
    str.s = 0;
    last[0] = 0;
    if (fnse) fpse = bgzf_open(fnse, "w1");

    b = bam_init1();
    while (bam_read1(fp, b) >= 0) {
        int i, qlen = b->core.l_qseq, is_print = 0;
        uint8_t *qual, *seq;
        if (b->flag&BAM_FSECONDARY) continue; // skip secondary alignments
        ++n_reads;
        if (fpse) {
            if (str.l && strcmp(last, bam_get_qname(b))) {
                bgzf_write(fpse, str.s, str.l);
                str.l = 0;
                ++n_singletons;
            }
            if (str.l) is_print = 1;
            strcpy(last, bam_get_qname(b));
        } else is_print = 1;
        qual = bam_get_qual(b);
        kputc(qual[0] == 0xff? '>' : '@', &str);
        kputsn(bam_get_qname(b), b->core.l_qname - 1, &str);
        if (has12) {
            kputc('/', &str);
            kputw(b->core.flag>>6&3, &str);
        }
        kputc('\n', &str);
        if (max_buf < qlen + 1) {
            max_buf = qlen + 1;
            kroundup32(max_buf);
            buf = (uint8_t*)realloc(buf, max_buf);
        }
        buf[qlen] = 0;
        seq = bam_get_seq(b);
        for (i = 0; i < qlen; ++i) buf[i] = bam_seqi(seq, i); // copy the sequence
        if (bam_is_rev(b)) { // reverse complement
            for (i = 0; i < qlen>>1; ++i) {
                int8_t t = seq_comp_table[buf[qlen - 1 - i]];
                buf[qlen - 1 - i] = seq_comp_table[buf[i]];
                buf[i] = t;
            }
            if (qlen&1) buf[i] = seq_comp_table[buf[i]];
        }
        for (i = 0; i < qlen; ++i) buf[i] = seq_nt16_str[buf[i]];
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (qual[0] != 0xff) {
            kputsn("+\n", 2, &str);
            for (i = 0; i < qlen; ++i) buf[i] = 33 + qual[i];
            if (bam_is_rev(b)) { // reverse
                for (i = 0; i < qlen>>1; ++i) {
                    uint8_t t = buf[qlen - 1 - i];
                    buf[qlen - 1 - i] = buf[i];
                    buf[i] = t;
                }
            }
        }
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (is_print) {
            fwrite(str.s, 1, str.l, stdout);
            str.l = 0;
        }
    }
    if (fpse) {
        if (str.l) {
            bgzf_write(fpse, str.s, str.l);
            ++n_singletons;
        }
        fprintf(stderr, "[M::%s] discarded %lld singletons\n", __func__, (long long)n_singletons);
        bgzf_close(fpse);
    }
    fprintf(stderr, "[M::%s] processed %lld reads\n", __func__, (long long)n_reads);
    free(buf);
    free(str.s);
    bam_destroy1(b);
    bgzf_close(fp);
    return 0;
}

示例#8

0

显示文件

文件： Utility.cpp 项目： ste69r/Biokanga

// ClassifyFileType
// Attempt to classify the alignment file as one of CSV, BED or SAM from it's initial 8k char contents
// Currently processes CSV, BED and SAM format file types
// Assumes must be SAM if initial lines have at least one prefixed by a '@' followed by a 2 letter record type code 
//
etClassifyFileType
CUtility::ClassifyFileType(char *pszFileName)
{
int hFile;
gzFile gz;
BGZF* pInBGZF;
int BuffLen;
int BuffIdx;
UINT8 Buffer[cFileClassifyBuffLen];
UINT8 *pBuff;
bool bStartNL;
bool bSkipEOL;
UINT8 Chr;
int NumLines;
int FieldCnt;
int TabCnt;
int CommaCnt;
int FldLen;
bool bInQuotes;
int LikelyCSV;
int LikelyBED;
int LikelySAM;
int LikelyNonCSVSAMBED;
bool bSeenSAMHdrs;
int FileNameLen;
bool bGZd;

FileNameLen = (int)strlen(pszFileName);
bGZd = false;
if(FileNameLen >= 4)
	{
	if(!stricmp(&pszFileName[FileNameLen-3],".gz"))
		bGZd = true;
	else
		{
		if(FileNameLen >= 5 && !stricmp(&pszFileName[FileNameLen-4],".bam"))
			{
			hFile = open(pszFileName,O_READSEQ);
			if(hFile == -1)
				return(eCFTopenerr);
			// BAM will using BGZF compression ..
			if((pInBGZF = bgzf_dopen(hFile, "r"))==NULL)
				{
				gDiagnostics.DiagOut(eDLFatal,gszProcName,"ClassifyFileType: unable to initialise for BGZF processing on file '%s'",pszFileName);
				close(hFile);
				return(eCFTopenerr);
				}
			hFile = -1;

			// try reading the header, bgzf_read will confirm it does start with "BAM\1" ....
			if((BuffLen = (int)bgzf_read(pInBGZF,Buffer,100)) < 100)		// will be < 100 if errors ...
				{
				gDiagnostics.DiagOut(eDLFatal,gszProcName,"ClassifyFileType: Not a BAM format file '%s'",pszFileName);
				bgzf_close(pInBGZF);
				return(eCFTopenerr);
				}
			bgzf_close(pInBGZF);
			return(eCFTSAM);
			}
		}
	}

// now can try to actually open file and read in first cFileClassifyBuffLen chars
if(bGZd)
	{
	gz = gzopen(pszFileName,"rb");
	if(gz == NULL)
		{
		gDiagnostics.DiagOut(eDLFatal,gszProcName,"Open: unable to open for reading gzip'd file '%s'",pszFileName);
		return(eCFTopenerr);
		}
	BuffLen = gzread(gz,Buffer,sizeof(Buffer)-1);
	gzclose(gz);
	}
else
	{
	hFile = open(pszFileName,O_READSEQ);
	if(hFile == -1)
		return(eCFTopenerr);
	// read the 1st cFileTypeBuffLen into buffer
	BuffLen = read(hFile,Buffer,sizeof(Buffer)-1);
	close(hFile);
	}

if(BuffLen < cMinFileClassifyLen)	// an arbitary lower limit!
	return(eCFTlenerr);

Buffer[BuffLen] = '\0';
pBuff = Buffer;
NumLines = 0;
LikelyCSV = 0;
LikelyBED = 0;
LikelySAM = 0;
LikelyNonCSVSAMBED = 0;
BuffIdx = 0;
bStartNL = true;
bSeenSAMHdrs = false;
while(Chr = *pBuff++)
	{
	BuffIdx += 1;
	if(bStartNL)
		{
		FieldCnt = 0;
		TabCnt = 0;
		CommaCnt = 0;
		FldLen = 0;
		bInQuotes = false;
		bStartNL = false;
		bSkipEOL = false;
		NumLines += 1;
		}
	if(Chr == '\n' || Chr == '\r')			// if at end of line
		{
		bStartNL = true;
		bSkipEOL = false;
		if(FieldCnt < 3)					// BED can have down to 3 fields, CSV alignment and SAM should have more
			continue;

		if(!bInQuotes)
			{
			if(CommaCnt >= 3 && CommaCnt > TabCnt)		// if at least as many commas as tabs as assumed field separators then most likely a CSV file
				LikelyCSV += 10;
			else							// if more tabs than commas then could be either BED or SAM
				{
				if(bSeenSAMHdrs)
					{
					LikelyBED += 5;
					LikelySAM += 10;			// SAM would be distinguished by it's header lines starting with '@"
					}
				else
					{
					LikelyBED += 20;
					LikelySAM += 5;
					}
				}
			}
		continue;
		}
	
	if(bSkipEOL)
		continue;

	if(!FieldCnt && !FldLen && (Chr == ' ' || Chr == '\t'))		// simply slough all leading whitespaces before intial field starts
		continue;

	// nested quotes are potentially a problem; currently quotes are simply sloughed
	if(Chr == '\'' || Chr == '"')
		{
		bInQuotes = !bInQuotes;
		continue;
		}


	if(!FieldCnt && !bInQuotes && Chr == '@' || Chr == '>')
		{
		if(Chr == '@')				// if SAM then header line(s) should be present and can be expected to start with "@HD", "@SQ", "@RG", "@PG", "@CO" 
			{		
			if(BuffIdx <  (BuffLen - 3))
				{
				if(((*pBuff == 'H' && pBuff[1] == 'D') ||
					(*pBuff == 'S' && pBuff[1] == 'Q') ||
					(*pBuff == 'R' && pBuff[1] == 'G') ||
					(*pBuff == 'P' && pBuff[1] == 'G') ||
					(*pBuff == 'C' && pBuff[1] == 'O')) &&
					(pBuff[2] == ' ' || pBuff[2] == '\t' ))
					{
					bSeenSAMHdrs = true;
					LikelyNonCSVSAMBED = 0;
					LikelySAM += 10000;
					bSkipEOL = true;
					continue;
					}
				else
					{
					if(!bSeenSAMHdrs)					// if no SAM headers parsed then could easily be a fastq...
						{
						LikelyNonCSVSAMBED += 50;
						bSkipEOL = true;
						continue;
						}
					}
				}
			}
		if(Chr == '>')									// if at start of line then could easily be fasta...
			LikelyNonCSVSAMBED += 50;
		}

	switch(Chr) {
		case ' ':			// simply slough spaces
			continue;

		case ',':			// if comma then likely is a csv, but could still be BED if in optional fields 9 (itemRgb) onwards 
			if(TabCnt < 8 && FieldCnt >= TabCnt)
				{
				FieldCnt += 1;	
				CommaCnt += 1;
				FldLen = 0;
				}
			break;

		case '\t':			// tabs are in BED and SAM as field separators, but could also be present in CSV as spacers
			if(CommaCnt < 3 && FieldCnt >= CommaCnt)
				{
				FieldCnt += 1;	
				TabCnt += 1;
				FldLen = 0;
				}
			break;

		default:			// any other char is assumed to be part of an actual field value
			FldLen += 1;
			break;
		}
	}

if(LikelyNonCSVSAMBED >= 250 || (LikelyCSV < 10 && LikelyBED < 10 && LikelySAM < 500))
	return(eCFTunknown);

if(LikelyCSV >= LikelyBED && LikelyCSV >= LikelySAM)
	return(eCFTCSV);
if(LikelyBED >= LikelySAM)
	return(eCFTBED);
return(eCFTSAM);	
}

示例#9

0

显示文件

文件： main.c 项目： xied75/tabix

int reheader_file(const char *header, const char *file, int meta)
{
    char *buffer;
    int skip_until = 0;
    FILE *fh;
    int page_size;
    char *buf;
    BGZF *bgzf_out;
    ssize_t nread;
    BGZF *fp = bgzf_open(file,"r");
    if (bgzf_read_block(fp) != 0 || !fp->block_length)
        return -1;
    
    buffer = fp->uncompressed_block;
    
    if ( buffer[0]==meta )
    {
        skip_until = 1;

        // Skip the header
        while (1)
        {
            if ( buffer[skip_until]=='\n' )
            {
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length)
                        error("no body?\n");
                    skip_until = 0;
                }
                // The header has finished
                if ( buffer[skip_until]!=meta ) break;
            }
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                if (bgzf_read_block(fp) != 0 || !fp->block_length)
                    error("no body?\n");
                skip_until = 0;
            }
        }
    }

    fh = fopen(header,"r");
    if ( !fh )
        error("%s: %s", header,strerror(errno));
    page_size = getpagesize();
    buf = malloc(page_size); //Dong Code
    bgzf_out = bgzf_dopen(fileno(stdout), "w");
    while ( (nread=fread(buf,1,page_size-1,fh))>0 )
    {
        if ( nread<page_size-1 && buf[nread-1]!='\n' )
            buf[nread++] = '\n';
        if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
    }
    fclose(fh);

    if ( fp->block_length - skip_until > 0 )
    {
        if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
            error("Error: %d\n",fp->errcode);
    }
    if (bgzf_flush(bgzf_out) < 0) 
        error("Error: %d\n",bgzf_out->errcode);

    while (1)
    {
        int count;
#ifdef _USE_KNETFILE
        nread = knet_read(fp->fp, buf, page_size);
#else
        nread = fread(buf, 1, page_size, fp->fp);
#endif
        if ( nread<=0 ) 
            break;

        count = fwrite(buf, 1, nread, bgzf_out->fp);
        if (count != nread)
            error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }

    if (bgzf_close(bgzf_out) < 0) 
        error("Error: %d\n",bgzf_out->errcode);
   
    return 0;
}