Example #1
0
void abcSmartCounts::changeChr(int newRefId){
  if(doSmartCounts==0)
    return;
  //  fprintf(stderr,"cur:%d new:%d\n",curChr,newRefId);
  if(curChr!=-1){
    int64_t retVal =bgzf_tell(fbin); 
    int clen = strlen(header->name[curChr]);
    bgzf_write(fbin,&clen,sizeof(int));
    bgzf_write(fbin,header->name[curChr],clen);
    bgzf_write(fbin,&len,sizeof(int));
    for(int i=0;i<4;i++)
      bgzf_write(fbin,counts[i],len);//write len of chr
    
    //write index stuff
    fprintf(stderr,"Writing index for chr: %s\n",header->name[curChr]);
    fwrite(&clen,sizeof(int),1,fidx);
    fwrite(header->name[curChr] ,sizeof(char),clen,fidx);
    fwrite(&len,sizeof(int),1,fidx);
    fwrite(&retVal,sizeof(int64_t),1,fidx);
  }
  curChr = newRefId;
  len = header->l_ref[curChr];
  for(int i=0;i<4;i++){
    delete [] counts[i];
    counts[i] = new unsigned char[len];
    memset(counts[i],0,len);
  } 
}
Example #2
0
int64_t writeAll(std::vector<the_t> &thetas, char *chr,BGZF *fp){
  fprintf(stderr,"\tWriting: chr:%s with nSites:%zu\n",chr,thetas.size());
  int64_t retVal =bgzf_tell(fp); 
  size_t clen=strlen(chr);
  bgzf_write(fp,&clen,sizeof(size_t));//write len of chr
  bgzf_write(fp,chr,clen);//write chr
  size_t vLen = thetas.size();
  bgzf_write(fp,&vLen,sizeof(size_t));//write len of positions;
  int *posi = new int[thetas.size()];
  static float **the = new float*[5];
  for(int i=0;i<5;i++)
    the[i] = new float[thetas.size()];
  for(size_t i=0;i<thetas.size();i++){
    posi[i] =thetas[i].posi;
    for(int j=0;j<5;j++)
      the[j][i] = thetas[i].vals[j];
    delete [] thetas[i].vals;
  }
  bgzf_write(fp,posi,sizeof(int)*thetas.size());
  for(int j=0;j<5;j++){
    bgzf_write(fp,the[j],sizeof(float)*thetas.size());
    delete [] the[j];
  }
  
  delete [] posi;
  fprintf(stderr,"\tDone writing: %s\n",chr);
  return retVal;
}
Example #3
0
abcSmartCounts::~abcSmartCounts(){

  if(doSmartCounts==0)
    return;

  int64_t retVal =bgzf_tell(fbin); 
  int clen = strlen(header->name[curChr]);
  bgzf_write(fbin,&clen,sizeof(int));
  bgzf_write(fbin,header->name[curChr],clen);
  bgzf_write(fbin,&len,sizeof(int));
  for(int i=0;i<4;i++)
    bgzf_write(fbin,counts[i],len);//write len of chr
  
  //write index stuff
  fwrite(&clen,sizeof(int),1,fidx);
  fwrite(header->name[curChr],sizeof(char),clen,fidx);
  fwrite(&len,sizeof(int),1,fidx);
  fwrite(&retVal,sizeof(int64_t),1,fidx);

  
  for(int i=0;i<4;i++)
    delete [] counts[i];
  delete [] counts;

  fclose(fidx);
  bgzf_close(fbin);

}
	void VariantList::printToCompressedVCF(IHeader::SharedPtr headerPtr, bool printHeader, int out)
	{
		BGZF* fp = bgzf_dopen(out, "w");
		if (printHeader)
		{
			bgzf_write(fp, headerPtr->getHeader().c_str(), headerPtr->getHeader().size());
		}
		for(const auto variantPtr : this->m_variant_ptrs)
		{
			bgzf_write(fp, variantPtr->getVariantLine(headerPtr).c_str(), variantPtr->getVariantLine(headerPtr).size());
		}
		bgzf_close(fp);
	}
Example #5
0
File: vcf.c Project: goshng/cocoa
int bcf_write1(BGZF *fp, const bcf1_t *v)
{
	uint32_t x[8];
	x[0] = v->shared.l + 24; // to include six 32-bit integers
	x[1] = v->indiv.l;
	memcpy(x + 2, v, 16);
	x[6] = (uint32_t)v->n_allele<<16 | v->n_info;
	x[7] = (uint32_t)v->n_fmt<<24 | v->n_sample;
	bgzf_write(fp, x, 32);
	bgzf_write(fp, v->shared.s, v->shared.l);
	bgzf_write(fp, v->indiv.s, v->indiv.l);
	return 0;
}
Example #6
0
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
	BGZF *fp;
	bam_header_t *old;
	int len;
	uint8_t *buf;
	if (in->open_mode != 'r') return -1;
	buf = malloc(BUF_SIZE);
	old = bam_header_read(in);
	fp = bgzf_dopen(fd, "w");
	bam_header_write(fp, h);
	if (in->block_offset < in->block_length) {
		bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
		bgzf_flush(fp);
	}
#ifdef _USE_KNETFILE
	while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0)
		fwrite(buf, 1, len, fp->fp);
#else
	while (!feof(in->fp) && (len = fread(buf, 1, BUF_SIZE, in->fp)) > 0)
		fwrite(buf, 1, len, fp->fp);
#endif
	free(buf);
	fp->block_offset = in->block_offset = 0;
	bgzf_close(fp);
	return 0;
}
Example #7
0
void abcScounts::print(funkyPars *pars){
  if(doScounts==0)
    return;
  for(int s=0;s<pars->numSites;s++){
    if(pars->keepSites[s]!=0){
      char tmpname[1024];
      sprintf(tmpname,"%s %d",header->target_name[pars->refId],pars->posi[s]+1);
      aMap::iterator it = am.find(tmpname);
      if(it==am.end()){
	fprintf(stderr,"\t-> problem finding site: %s\n",tmpname);
	continue;
      }
      counts cnts;
      if(pars->counts[s][0]>3||pars->counts[s][1]>3||pars->counts[s][2]>3||pars->counts[s][3]>3){
	fprintf(stderr,"\t-> skipping posi tmpname:%s du to depth>3\n",tmpname);
	continue;
      }
      if(pars->counts[s][0]+pars->counts[s][1]+pars->counts[s][2]+pars->counts[3]==0)
	continue;
      cnts.rel_pos = it->second;
      // fprintf(stderr,"realpos: %d rel_pos:%d\n",pars->posi[s]+1,it->second);
      cnts.A = pars->counts[s][0];
      cnts.C = pars->counts[s][1];
      cnts.G = pars->counts[s][2];
      cnts.T = pars->counts[s][3];
      assert(sizeof(counts)==bgzf_write(outfile,&cnts,sizeof(counts)*1));
      
    }
  }

}
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters *P) {
      
    if (binS==0) return; //nothing to do for empty bins
    //allocate arrays
    char *bamIn=new char[binS];
    uint *startPos=new uint[binN*3];

    uint bamInBytes=0;
    //load all aligns
    for (uint it=0; it<nThreads; it++) {
        string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
        ifstream bamInStream (bamInFile.c_str());
        bamInStream.read(bamIn+bamInBytes,binS);//read the whole file
        bamInBytes += bamInStream.gcount();
        bamInStream.close();
        remove(bamInFile.c_str());
    };
    if (bamInBytes!=binS) {
        ostringstream errOut;
        errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: ";
        errOut << binS <<"   "<< bamInBytes <<"   "<< iBin <<"\n";
        exitWithError(errOut.str(),std::cerr, P->inOut->logMain, 1, *P);
    };
  
    //extract coordinates
    
    for (uint ib=0,ia=0;ia<binN;ia++) {
        uint32 *bamIn32=(uint32*) (bamIn+ib);
        startPos[ia*3]  =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] );
        startPos[ia*3+2]=ib;      
        ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself
        startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order
        ib+=sizeof(uint);
    };
        
    //sort
    qsort((void*) startPos, binN, sizeof(uint)*3, funCompareUint2);
    
    BGZF *bgzfBin;
    bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P->outBAMcompression)).c_str());
    outBAMwriteHeader(bgzfBin,P->samHeaderSortedCoord,P->chrName,P->chrLength);
    //send ordered aligns to bgzf one-by-one
    for (uint ia=0;ia<binN;ia++) {
        char* ib=bamIn+startPos[ia*3+2];
        bgzf_write(bgzfBin,ib, *((uint32*) ib)+sizeof(uint32) ); 
    };
    
    bgzf_flush(bgzfBin);
    bgzf_close(bgzfBin);
    //release memory
    delete [] bamIn;
    delete [] startPos;
};
Example #9
0
void BAMoutput::unsortedOneAlign (char *bamIn, uint bamSize, uint bamSize2) {//record one alignment to the buffer, write buffer if needed
    
    if (binBytes1+bamSize2 > bamArraySize) {//write out this buffer

        if (g_threadChunks.threadBool) pthread_mutex_lock(&g_threadChunks.mutexOutSAM);  
        bgzf_write(bgzfBAM,bamArray,binBytes1);
        if (g_threadChunks.threadBool) pthread_mutex_unlock(&g_threadChunks.mutexOutSAM); 
        
        binBytes1=0;//rewind the buffer
    };
    
    memcpy(bamArray+binBytes1, bamIn, bamSize);
    binBytes1 += bamSize;
    
};
Example #10
0
void outBAMwriteHeader (BGZF* fp, const string &samh, const vector <string> &chrn, const vector <uint> &chrl) {
    bgzf_write(fp,"BAM\001",4);
    int32 hlen=samh.size();            
    bgzf_write(fp,(char*) &hlen,sizeof(hlen));
    bgzf_write(fp,samh.c_str(),hlen);
    int32 nchr=(int32) chrn.size();
    bgzf_write(fp,(char*) &nchr,sizeof(nchr));
    for (int32 ii=0;ii<nchr;ii++) {
        int32 rlen = (int32) (chrn.at(ii).size()+1);
        int32 slen = (int32) chrl[ii];
        bgzf_write(fp,(char*) &rlen,sizeof(rlen));
        bgzf_write(fp,chrn.at(ii).data(),rlen); //this includes \0 at the end of the string
        bgzf_write(fp,(char*) &slen,sizeof(slen));
    };
    bgzf_flush(fp);
};
Example #11
0
/*
 * Reads a file and outputs a new BAM file to fd with 'h' replaced as
 * the header.    No checks are made to the validity.
 */
int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
                 const char *arg_list, int add_PG)
{
    BGZF *fp;
    ssize_t len;
    uint8_t *buf;
    if (in->is_write) return -1;
    buf = malloc(BUF_SIZE);
    if (bam_hdr_read(in) == NULL) {
        fprintf(stderr, "Couldn't read header\n");
        free(buf);
        return -1;
    }
    fp = bgzf_fdopen(fd, "w");

    if (add_PG) {
        // Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
        SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
        if (sam_hdr_add_PG(sh, "samtools",
                           "VN", samtools_version(),
                           arg_list ? "CL": NULL,
                           arg_list ? arg_list : NULL,
                           NULL) != 0)
            return -1;

        free(h->text);
        h->text = strdup(sam_hdr_str(sh));
        h->l_text = sam_hdr_length(sh);
        if (!h->text)
            return -1;
        sam_hdr_free(sh);
    }

    bam_hdr_write(fp, h);
    if (in->block_offset < in->block_length) {
        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
        bgzf_flush(fp);
    }
    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
        bgzf_raw_write(fp, buf, len);
    free(buf);
    fp->block_offset = in->block_offset = 0;
    bgzf_close(fp);
    return 0;
}
Example #12
0
int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
{
    BGZF *fp;
    bam_header_t *old;
    ssize_t len;
    uint8_t *buf;
    if (in->is_write) return -1;
    buf = malloc(BUF_SIZE);
    old = bam_header_read(in);
    fp = bgzf_fdopen(fd, "w");
    bam_header_write(fp, h);
    if (in->block_offset < in->block_length) {
        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
        bgzf_flush(fp);
    }
    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
        bgzf_raw_write(fp, buf, len);
    free(buf);
    fp->block_offset = in->block_offset = 0;
    bgzf_close(fp);
    return 0;
}
Example #13
0
File: bgzip.c Project: Illumina/akt
int main(int argc, char **argv)
{
    int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0;
    BGZF *fp;
    void *buffer;
    long start, end, size;
    char *index_fname = NULL;
    int threads = 1;

    static const struct option loptions[] =
    {
        {"help", no_argument, NULL, 'h'},
        {"offset", required_argument, NULL, 'b'},
        {"stdout", no_argument, NULL, 'c'},
        {"decompress", no_argument, NULL, 'd'},
        {"force", no_argument, NULL, 'f'},
        {"index", no_argument, NULL, 'i'},
        {"index-name", required_argument, NULL, 'I'},
        {"reindex", no_argument, NULL, 'r'},
        {"rebgzip",no_argument,NULL,'g'},
        {"size", required_argument, NULL, 's'},
        {"threads", required_argument, NULL, '@'},
        {"version", no_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };

    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){
        switch(c){
        case 'd': compress = 0; break;
        case 'c': pstdout = 1; break;
        case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
        case 's': size = atol(optarg); pstdout = 1; break;
        case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
        case 'g': rebgzip = 1; break;
        case 'r': reindex = 1; compress = 0; break;
        case '@': threads = atoi(optarg); break;
        case 1:
            printf(
"bgzip (htslib) %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
            return EXIT_SUCCESS;
        case 'h':
        case '?': return bgzip_main_usage();
        }
    }
    if (size >= 0) end = start + size;
    if (end >= 0 && end < start) {
        fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
        return 1;
    }
    if (compress == 1) {
        struct stat sbuf;
        int f_src = fileno(stdin);

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if (pstdout)
                fp = bgzf_open("-", "w");
            else
            {
                char *name = malloc(strlen(argv[optind]) + 5);
                strcpy(name, argv[optind]);
                strcat(name, ".gz");
                fp = bgzf_open(name, is_forced? "w" : "wx");
                if (fp == NULL && errno == EEXIST && confirm_overwrite(name))
                    fp = bgzf_open(name, "w");
                if (fp == NULL) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdout)) )
            return bgzip_main_usage();
        else if ( index && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }
        else
            fp = bgzf_open("-", "w");

        if ( index && rebgzip )
        {
            fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
            return 1;
        }

        if ( rebgzip && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }

        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        if ( index ) bgzf_index_build_init(fp);
        buffer = malloc(WINDOW_SIZE);
#ifdef _WIN32
        _setmode(f_src, O_BINARY);
#endif
        if (rebgzip){
            if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);

            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        else {
            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        if ( index )
        {
            if (index_fname) {
                if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                    error("Could not write index to '%s'\n", index_fname);
            } else {
                if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
                    error("Could not write index to '%s.gz.gzi'", argv[optind]);
            }
        }
        if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
        if (argc > optind && !pstdout) unlink(argv[optind]);
        free(buffer);
        close(f_src);
        return 0;
    }
    else if ( reindex )
    {
        if ( argc>optind )
        {
            fp = bgzf_open(argv[optind], "r");
            if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
        }
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
            fp = bgzf_open("-", "r");
            if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

        buffer = malloc(BGZF_BLOCK_SIZE);
        bgzf_index_build_init(fp);
        int ret;
        while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");

        if ( index_fname ) {
            if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                error("Could not write index to '%s'\n", index_fname);
        } else {
            if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
                error("Could not write index to '%s.gzi'\n", argv[optind]);
        }

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
    }
    else
    {
        struct stat sbuf;
        int f_dst;

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }
            char *name;
            int len = strlen(argv[optind]);
            if ( strcmp(argv[optind]+len-3,".gz") )
            {
                fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
                return 1;
            }
            fp = bgzf_open(argv[optind], "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
                return 1;
            }

            if (pstdout) {
                f_dst = fileno(stdout);
            }
            else {
                const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
                name = strdup(argv[optind]);
                name[strlen(name) - 3] = '\0';
                f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666);
                if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name))
                    f_dst = open(name, wrflags, 0666);
                if (f_dst < 0) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdin)) )
            return bgzip_main_usage();
        else
        {
            f_dst = fileno(stdout);
            fp = bgzf_open("-", "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
                return 1;
            }
        }
        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
#ifdef _WIN32
        _setmode(f_dst, O_BINARY);
#endif
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
            start += c;
            if ( write(f_dst, buffer, c) != c ) {
#ifdef _WIN32
                if (GetLastError() != ERROR_NO_DATA)
#endif
                error("Could not write %d bytes\n", c);
            }
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
        if (!pstdout) unlink(argv[optind]);
        return 0;
    }
}
Example #14
0
File: vcf.c Project: goshng/cocoa
void bcf_hdr_write(BGZF *fp, const bcf_hdr_t *h)
{
	bgzf_write(fp, "BCF\2\1", 5);
	bgzf_write(fp, &h->l_text, 4);
	bgzf_write(fp, h->text, h->l_text);
}
Example #15
0
int BGZipFileWriter::writeLine(const char* s) {
    int ret = bgzf_write(this->fp, s, strlen(s));
    ret += bgzf_write(this->fp, "\n", 1);
        return (ret);
};
Example #16
0
int BGZipFileWriter::write(const char* s) {
        return bgzf_write(this->fp, s, strlen(s));
    };
Example #17
0
int fst_index(int argc,char **argv){
  if(argc<1){
    fprintf(stderr,"Must supply afile.saf.idx [chrname, write more info]\n");
    return 0; 
  }
  args *arg = getArgs(argc,argv);
  if(!arg->fstout){
    fprintf(stderr,"\t-> Must supply -fstout for doing fstindex\n");
    return 0;
  }

  std::vector<persaf *> &saf =arg->saf;
  //assert(saf.size()==2);
  size_t nSites = arg->nSites;
  if(nSites == 0){//if no -nSites is specified
    nSites = 100000;//<- set default to 100k sites, no need to load everything...
    // nSites=nsites(saf,arg);
  }
  fprintf(stderr,"\t-> nSites: %lu\n",nSites);
  std::vector<Matrix<float> *> gls;
  for(int i=0;i<saf.size();i++)
    gls.push_back(alloc<float>(nSites,saf[i]->nChr+1));

  //  int ndim= parspace(saf);
  if(arg->sfsfname.size()!=choose(saf.size(),2)){
    fprintf(stderr,"\t-> You have supplied: %lu populations, that is %d pairs\n",saf.size(),choose(saf.size(),2));
    fprintf(stderr,"\t-> You therefore need to supply %d 2dsfs priors instead of:%lu\n",choose(saf.size(),2),arg->sfsfname.size());
    exit(0);
  }
  std::vector<double *> sfs;
  int inc =0;
  for(int i=0;i<saf.size();i++)
    for(int j=i+1;j<saf.size();j++){
      size_t pairdim = (saf[i]->nChr+1)*(saf[j]->nChr+1);
      double *ddd=new double[pairdim];
      readSFS(arg->sfsfname[inc],pairdim,ddd);
      normalize(ddd,pairdim);
      sfs.push_back(ddd);
      inc++;
    }

  
  double **a1,**b1;
  a1=new double*[choose(saf.size(),2)];
  b1=new double*[choose(saf.size(),2)];
  inc=0;
  for(int i=0;i<saf.size();i++)
    for(int j=i+1;j<saf.size();j++){
      calcCoef((int)saf[i]->nChr,(int)saf[j]->nChr,&a1[inc],&b1[inc]);
      //      fprintf(stderr,"a1[%d]:%p b1[%d]:%p\n",inc,&a1[inc][0],inc,&b1[inc][0]);
      inc++;
    }

  BGZF *fstbg = openFileBG(arg->fstout,".fst.gz");
  FILE *fstfp = openFile(arg->fstout,".fst.idx");
  char buf[8]="fstv1";
  bgzf_write(fstbg,buf,8);    
  fwrite(buf,1,8,fstfp);
#if 0
  for(int i=0;i<ndim;i++)
    fprintf(stdout,"%f %f\n",a1[i],b1[i]);
  exit(0);
#endif
#if 1
  size_t nsafs=saf.size();
  fwrite(&nsafs,sizeof(size_t),1,fstfp);
  for(int i=0;i<nsafs;i++){
    size_t clen= strlen(saf[i]->fname);
    fwrite(&clen,sizeof(size_t),1,fstfp);
    fwrite(saf[i]->fname,1,clen,fstfp);
  }
#endif
  int asdf = choose(saf.size(),2);
  std::vector<double> *ares = new std::vector<double> [choose(saf.size(),2)];
  std::vector<double> *bres = new std::vector<double> [choose(saf.size(),2)];
  //  for(int i=0;i<3;i++)
    //    fprintf(stderr,"ares.size():%lu bres.size():%lu sfs:%p\n",ares[i].size(),bres[i].size(),&sfs[i][0]);
  std::vector<int> posi;
  setGloc(saf,nSites);
  int *posiToPrint = new int[nSites];
  for(myMap::iterator it = saf[0]->mm.begin();it!=saf[0]->mm.end();++it) {
    //    fprintf(stderr,"doing chr:%s\n",it->first);
    if(arg->chooseChr!=NULL){
      it = saf[0]->mm.find(arg->chooseChr);
      if(it==saf[0]->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",arg->chooseChr);
	break;
      }
    }
    for(int i=0;i<choose(saf.size(),2);i++){
      ares[i].clear();
      bres[i].clear();
    }
    posi.clear();
    while(1) {
      int ret=readdata(saf,gls,nSites,it->first,arg->start,arg->stop,posiToPrint,NULL);//read nsites from data
      //  fprintf(stderr,"ret:%d glsx:%lu\n",ret,gls[0]->x);
      //if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){
	//fprintf(stderr,"continue continue\n");
      //	continue;
      //}
      
      fprintf(stderr,"\t-> Will now do fst temp dump using a chunk of %lu\n",gls[0]->x);
      int inc=0;
      for(int i=0;i<saf.size();i++)
	for(int j=i+1;j<saf.size();j++){
	  //	  fprintf(stderr,"i:%d j:%d inc:%d gls[i]:%p gls[j]:%p sfs:%p a1:%p b1:%p\n",i,j,inc,gls[i],gls[j],sfs[i],&a1[inc][0],&a1[inc][0]);
	  block_coef(gls[i],gls[j],sfs[inc],a1[inc],b1[inc],ares[inc],bres[inc]);
	  inc++;
	}
      for(int i=0;i<gls[0]->x;i++)
	posi.push_back(posiToPrint[i]);

      for(int i=0;i<gls.size();i++)
	gls[i]->x =0;
      if(ret==-2)//no more data in files or in chr, eith way we break;
	break;
    }
    size_t clen = strlen(it->first);
    fwrite(&clen,sizeof(size_t),1,fstfp);
    fwrite(it->first,1,clen,fstfp);
    size_t nit=posi.size();

    assert(1==fwrite(&nit,sizeof(size_t),1,fstfp));
    int64_t tell = bgzf_tell(fstbg);
    fwrite(&tell,sizeof(int64_t),1,fstfp);
    bgzf_write(fstbg,&posi[0],posi.size()*sizeof(int));
    int inc =0;
    for(int i=0;i<saf.size();i++)
      for(int j=i+1;j<saf.size();j++){
	bgzf_write(fstbg,&(ares[inc][0]),ares[inc].size()*sizeof(double));
	bgzf_write(fstbg,&(bres[inc][0]),bres[inc].size()*sizeof(double));
	inc++;
      }
    if(arg->chooseChr!=NULL)
      break;
  }
  delGloc(saf,nSites);
  destroy(gls,nSites);
  destroy_args(arg);
  for(int i=0;i<sfs.size();i++)
    delete [] sfs[i];
#if 0
  fprintf(stderr,"\n\t-> NB NB output is no longer log probs of the frequency spectrum!\n");
  fprintf(stderr,"\t-> Output is now simply the expected values! \n");
  fprintf(stderr,"\t-> You can convert to the old format simply with log(norm(x))\n");
#endif
  bgzf_close(fstbg);
  fclose(fstfp);
  fprintf(stderr,"\t-> fst index finished with no errors!\n");
  return 0;
}
Example #18
0
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
{
    BGZF *fp;
    uint8_t *buf;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;

    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
    if (fp == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
        return 1;
    }
    if (h) bam_hdr_write(fp, h);

    buf = (uint8_t*) malloc(BUF_SIZE);
    for(i = 0; i < nfn; ++i){
        BGZF *in;
        bam_hdr_t *old;
        int len,j;

        in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
            return -1;
        }
        if (in->is_write) return -1;

        old = bam_hdr_read(in);
        if (old == NULL) {
            fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
                    __func__, fn[i]);
            bgzf_close(in);
            return -1;
        }
        if (h == 0 && i == 0) bam_hdr_write(fp, old);

        if (in->block_offset < in->block_length) {
            bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
            bgzf_flush(fp);
        }

        j=0;
        while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
                    return -1;
                }
                bgzf_raw_write(fp, ebuf, len);
                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) bgzf_raw_write(fp, ebuf, es);
                len-= es;
                memcpy(ebuf,buf+len,es);
                bgzf_raw_write(fp, buf, len);
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
                fprintf(stderr, " Possible output corruption.\n");
                bgzf_raw_write(fp, ebuf, es);
            }
        }
        bam_hdr_destroy(old);
        bgzf_close(in);
    }
    free(buf);
    bgzf_close(fp);
    return 0;
}
Example #19
0
void vcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2', '\1'};
	bgzf_write(out, magic, 5);

	if (meta_data.has_idx)
	{
		LOG.warning("VCF file contains IDX values in header. These are being removed for conversion to BCF.");
		meta_data.reprint();
		meta_data.reparse();
	}
	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	if (meta_data.has_contigs == false)
	{
		vector<string> contig_vector;
		get_contigs(params.contigs_file, contig_vector);

		for(unsigned int ui=0; ui<contig_vector.size(); ui++)
		{
			meta_data.add_CONTIG_descriptor(contig_vector[ui].substr(10, contig_vector[ui].size()-8),int(ui));
			for(unsigned int uj=0; uj<contig_vector[ui].size(); uj++)
				header.push_back(contig_vector[ui][uj]);
			header.push_back('\n');
		}
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
		{
			header_str += "\t";
			header_str += meta_data.indv[ui];
		}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );

	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );

	vector<char> variant_line;
	entry * e = new vcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true,true,true,true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}
Example #20
0
File: bgzip.c Project: Abdul59/STAR
int main(int argc, char **argv)
{
	int c, compress, pstdout, is_forced, index = 0, reindex = 0;
	BGZF *fp;
	void *buffer;
	long start, end, size;
    char *index_fname = NULL;

    static struct option loptions[] = 
    {
        {"help",0,0,'h'},
        {"offset",1,0,'b'},
        {"stdout",0,0,'c'},
        {"decompress",0,0,'d'},
        {"force",0,0,'f'},
        {"index",0,0,'i'},
        {"index-name",1,0,'I'},
        {"reindex",0,0,'r'},
        {"size",1,0,'s'},
        {0,0,0,0}
    };

	compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
	while((c  = getopt_long(argc, argv, "cdh?fb:s:iI:r",loptions,NULL)) >= 0){
		switch(c){
		case 'd': compress = 0; break;
		case 'c': pstdout = 1; break;
		case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
		case 's': size = atol(optarg); pstdout = 1; break;
		case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
        case 'r': reindex = 1; compress = 0; break;
		case 'h': 
        case '?': return bgzip_main_usage();
		}
	}
	if (size >= 0) end = start + size;
	if (end >= 0 && end < start) {
		fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
		return 1;
	}
	if (compress == 1) {
		struct stat sbuf;
		int f_src = fileno(stdin);
		int f_dst = fileno(stdout);

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 ) 
			{ 
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1; 
			}

			if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}

			if (pstdout)
				f_dst = fileno(stdout);
			else
			{
				char *name = malloc(strlen(argv[optind]) + 5);
				strcpy(name, argv[optind]);
				strcat(name, ".gz");
				f_dst = write_open(name, is_forced);
				if (f_dst < 0) return 1;
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdout)) )
			return bgzip_main_usage();
        else if ( index && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }

		fp = bgzf_fdopen(f_dst, "w");
        if ( index ) bgzf_index_build_init(fp);
		buffer = malloc(WINDOW_SIZE);
		while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
			if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
		// f_dst will be closed here
        if ( index ) 
        {
            if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL);
            else bgzf_index_dump(fp, argv[optind], ".gz.gzi");
        }
		if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
		if (argc > optind && !pstdout) unlink(argv[optind]);
		free(buffer);
		close(f_src);
		return 0;
	}
    else if ( reindex )
    {
        if ( argc>optind )
        {
			fp = bgzf_open(argv[optind], "r");
            if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
        }
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
            fp = bgzf_fdopen(fileno(stdin), "r");
        	if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

        buffer = malloc(BGZF_BLOCK_SIZE);
        bgzf_index_build_init(fp);
        int ret;
        while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");
 
        if ( index_fname )
            bgzf_index_dump(fp, index_fname, NULL);
        else 
            bgzf_index_dump(fp, argv[optind], ".gzi");

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
    }
    else
    {
		struct stat sbuf;
		int f_dst;

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 )
			{
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}
			char *name;
			int len = strlen(argv[optind]);
			if ( strcmp(argv[optind]+len-3,".gz") )
			{
				fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
				return 1;
			}
			fp = bgzf_open(argv[optind], "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
				return 1;
			}

			if (pstdout) {
				f_dst = fileno(stdout);
			}
			else {
				name = strdup(argv[optind]);
				name[strlen(name) - 3] = '\0';
				f_dst = write_open(name, is_forced);
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdin)) )
			return bgzip_main_usage();
		else
		{
			f_dst = fileno(stdout);
			fp = bgzf_fdopen(fileno(stdin), "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
				return 1;
			}
		}
        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
            start += c;
            if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c);
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
        if (!pstdout) unlink(argv[optind]);
        return 0;
	}
    return 0;
}
Example #21
0
File: main.c Project: Brainiarc7/TS
int reheader_file(const char *header, const char *file, int meta)
{
    BGZF *fp = bgzf_open(file,"r");
    if (bgzf_read_block(fp) != 0 || !fp->block_length)
        return -1;
    
    char *buffer = fp->uncompressed_block;
    int skip_until = 0;

    if ( buffer[0]==meta )
    {
        skip_until = 1;

        // Skip the header
        while (1)
        {
            if ( buffer[skip_until]=='\n' )
            {
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length)
                        error("no body?\n");
                    skip_until = 0;
                }
                // The header has finished
                if ( buffer[skip_until]!=meta ) break;
            }
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                if (bgzf_read_block(fp) != 0 || !fp->block_length)
                    error("no body?\n");
                skip_until = 0;
            }
        }
    }

    FILE *fh = fopen(header,"r");
    if ( !fh )
        error("%s: %s", header,strerror(errno));
    int page_size = getpagesize();
    char *buf = valloc(page_size);
    BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
    ssize_t nread;
    while ( (nread=fread(buf,1,page_size-1,fh))>0 )
    {
        if ( nread<page_size-1 && buf[nread-1]!='\n' )
            buf[nread++] = '\n';
        if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
    }
    fclose(fh);

    if ( fp->block_length - skip_until > 0 )
    {
        if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
            error("Error: %s\n",fp->error);
    }
    if (bgzf_flush(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);

    while (1)
    {
#ifdef _USE_KNETFILE
        nread = knet_read(fp->x.fpr, buf, page_size);
#else
        nread = fread(buf, 1, page_size, fp->file);
#endif
        if ( nread<=0 ) 
            break;

#ifdef _USE_KNETFILE
        int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
#else
        int count = fwrite(buf, 1, nread, bgzf_out->file);
#endif
        if (count != nread)
            error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }

    if (bgzf_close(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);
   
    return 0;
}
Example #22
0
void BAMoutput::unsortedFlush () {//flush all alignments
    if (g_threadChunks.threadBool) pthread_mutex_lock(&g_threadChunks.mutexOutSAM);  
    bgzf_write(bgzfBAM,bamArray,binBytes1);
    if (g_threadChunks.threadBool) pthread_mutex_unlock(&g_threadChunks.mutexOutSAM); 
    binBytes1=0;//rewind the buffer
};
Example #23
0
int main_bam2fq(int argc, char *argv[])
{
    BGZF *fp, *fpse = 0;
    bam1_t *b;
    uint8_t *buf;
    int max_buf, c, has12 = 0;
    kstring_t str;
    int64_t n_singletons = 0, n_reads = 0;
    char last[512], *fnse = 0;

    while ((c = getopt(argc, argv, "as:")) > 0)
        if (c == 'a') has12 = 1;
        else if (c == 's') fnse = optarg;
    if (argc == optind) {
        fprintf(stderr, "\nUsage:   bam2fq [-a] [-s outSE] <in.bam>\n\n");
        fprintf(stderr, "Options: -a        append /1 and /2 to the read name\n");
        fprintf(stderr, "         -s FILE   write singleton reads to FILE [assume single-end]\n");
        fprintf(stderr, "\n");
        return 1;
    }
    fp = strcmp(argv[optind], "-")? bgzf_open(argv[optind], "r") : bgzf_dopen(fileno(stdin), "r");
    assert(fp);
    bam_hdr_destroy(bam_hdr_read(fp));
    buf = 0;
    max_buf = 0;
    str.l = str.m = 0;
    str.s = 0;
    last[0] = 0;
    if (fnse) fpse = bgzf_open(fnse, "w1");

    b = bam_init1();
    while (bam_read1(fp, b) >= 0) {
        int i, qlen = b->core.l_qseq, is_print = 0;
        uint8_t *qual, *seq;
        if (b->flag&BAM_FSECONDARY) continue; // skip secondary alignments
        ++n_reads;
        if (fpse) {
            if (str.l && strcmp(last, bam_get_qname(b))) {
                bgzf_write(fpse, str.s, str.l);
                str.l = 0;
                ++n_singletons;
            }
            if (str.l) is_print = 1;
            strcpy(last, bam_get_qname(b));
        } else is_print = 1;
        qual = bam_get_qual(b);
        kputc(qual[0] == 0xff? '>' : '@', &str);
        kputsn(bam_get_qname(b), b->core.l_qname - 1, &str);
        if (has12) {
            kputc('/', &str);
            kputw(b->core.flag>>6&3, &str);
        }
        kputc('\n', &str);
        if (max_buf < qlen + 1) {
            max_buf = qlen + 1;
            kroundup32(max_buf);
            buf = (uint8_t*)realloc(buf, max_buf);
        }
        buf[qlen] = 0;
        seq = bam_get_seq(b);
        for (i = 0; i < qlen; ++i) buf[i] = bam_seqi(seq, i); // copy the sequence
        if (bam_is_rev(b)) { // reverse complement
            for (i = 0; i < qlen>>1; ++i) {
                int8_t t = seq_comp_table[buf[qlen - 1 - i]];
                buf[qlen - 1 - i] = seq_comp_table[buf[i]];
                buf[i] = t;
            }
            if (qlen&1) buf[i] = seq_comp_table[buf[i]];
        }
        for (i = 0; i < qlen; ++i) buf[i] = seq_nt16_str[buf[i]];
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (qual[0] != 0xff) {
            kputsn("+\n", 2, &str);
            for (i = 0; i < qlen; ++i) buf[i] = 33 + qual[i];
            if (bam_is_rev(b)) { // reverse
                for (i = 0; i < qlen>>1; ++i) {
                    uint8_t t = buf[qlen - 1 - i];
                    buf[qlen - 1 - i] = buf[i];
                    buf[i] = t;
                }
            }
        }
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (is_print) {
            fwrite(str.s, 1, str.l, stdout);
            str.l = 0;
        }
    }
    if (fpse) {
        if (str.l) {
            bgzf_write(fpse, str.s, str.l);
            ++n_singletons;
        }
        fprintf(stderr, "[M::%s] discarded %lld singletons\n", __func__, (long long)n_singletons);
        bgzf_close(fpse);
    }
    fprintf(stderr, "[M::%s] processed %lld reads\n", __func__, (long long)n_reads);
    free(buf);
    free(str.s);
    bam_destroy1(b);
    bgzf_close(fp);
    return 0;
}
Example #24
0
int main(int argc, char **argv)
{
	int c, compress, pstdout, is_forced;
	BGZF *fp;
	void *buffer;
	long start, end, size;

	compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
	while((c  = getopt(argc, argv, "cdhfb:s:")) >= 0){
		switch(c){
		case 'h': return bgzip_main_usage();
		case 'd': compress = 0; break;
		case 'c': pstdout = 1; break;
		case 'b': start = atol(optarg); break;
		case 's': size = atol(optarg); break;
		case 'f': is_forced = 1; break;
		}
	}
	if (size >= 0) end = start + size;
	if (end >= 0 && end < start) {
		fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
		return 1;
	}
	if (compress == 1) {
		struct stat sbuf;
		int f_src = fileno(stdin);
		int f_dst = fileno(stdout);

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 ) 
			{ 
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1; 
			}

			if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}

			if (pstdout)
				f_dst = fileno(stdout);
			else
			{
				char *name = malloc(strlen(argv[optind]) + 5);
				strcpy(name, argv[optind]);
				strcat(name, ".gz");
				f_dst = write_open(name, is_forced);
				if (f_dst < 0) return 1;
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdout)) )
			return bgzip_main_usage();

		fp = bgzf_fdopen(f_dst, "w");
		buffer = malloc(WINDOW_SIZE);
		while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
			if (bgzf_write(fp, buffer, c) < 0) fail(fp);
		// f_dst will be closed here
		if (bgzf_close(fp) < 0) fail(fp);
		if (argc > optind && !pstdout) unlink(argv[optind]);
		free(buffer);
		close(f_src);
		return 0;
	} else {
		struct stat sbuf;
		int f_dst;

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 )
			{
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}
			char *name;
			int len = strlen(argv[optind]);
			if ( strcmp(argv[optind]+len-3,".gz") )
			{
				fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
				return 1;
			}
			fp = bgzf_open(argv[optind], "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
				return 1;
			}

			if (pstdout) {
				f_dst = fileno(stdout);
			}
			else {
				name = strdup(argv[optind]);
				name[strlen(name) - 3] = '\0';
				f_dst = write_open(name, is_forced);
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdin)) )
			return bgzip_main_usage();
		else
		{
			f_dst = fileno(stdout);
			fp = bgzf_fdopen(fileno(stdin), "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
				return 1;
			}
		}
		buffer = malloc(WINDOW_SIZE);
		if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
		while (1) {
			if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
			else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
			if (c == 0) break;
			if (c < 0) fail(fp);
			start += c;
			write(f_dst, buffer, c);
			if (end >= 0 && start >= end) break;
		}
		free(buffer);
		if (bgzf_close(fp) < 0) fail(fp);
		if (!pstdout) unlink(argv[optind]);
		return 0;
	}
}
Example #25
0
int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
{
    BGZF *fp;
    FILE* fp_file;
    uint8_t *buf;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;
    
    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(_fileno(stdout), "w");
    if (fp == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __FUNCTION__, outbam);
        return 1;
    }
    if (h) bam_header_write(fp, h);
    
    buf = (uint8_t*) malloc(BUF_SIZE);
    for(i = 0; i < nfn; ++i){
        BGZF *in;
        bam_header_t *old;
        int len,j;
        
        in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(_fileno(stdin), "r");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __FUNCTION__, fn[i]);
            return -1;
        }
        if (in->open_mode != 'r') return -1;
        
        old = bam_header_read(in);
        if (h == 0 && i == 0) bam_header_write(fp, old);
        
        if (in->block_offset < in->block_length) {
            bgzf_write(fp, (uint8_t*)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
            bgzf_flush(fp);
        }
        
        j=0;
#ifdef _USE_KNETFILE
        fp_file=fp->x.fpw;
        while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) {
#else  
        fp_file=fp->file;
        while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
#endif
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __FUNCTION__, fn[i]);
                    return -1;
                }
                fwrite(ebuf, 1, len, fp_file);
                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) fwrite(ebuf, 1, es, fp_file);
                len-= es;
                memcpy(ebuf,buf+len,es);
                fwrite(buf, 1, len, fp_file);
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __FUNCTION__, fn[i]);
                fprintf(stderr, " Possible output corruption.\n");
                fwrite(ebuf, 1, es, fp_file);
            }
        }
        bam_header_destroy(old);
        bgzf_close(in);
    }
    free(buf);
    bgzf_close(fp);
    return 0;
}



int main_cat(int argc, char *argv[])
{
    bam_header_t *h = 0;
    char *outfn = 0;
    int c, ret;
    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
        switch (c) {
            case 'h': {
                tamFile fph = sam_open(optarg);
                if (fph == 0) {
                    fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __FUNCTION__, argv[1]);
                    return 1;
                }
                h = sam_header_read(fph);
                sam_close(fph);
                break;
            }
            case 'o': outfn = strdup(optarg); break;
        }
    }
    if (argc - optind < 2) {
        fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
        return 1;
    }
    ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
    free(outfn);
    return ret;
}
Example #26
0
void bcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2','\2'};
	bgzf_write(out, magic, 5);

	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
	{
		header_str += "\t";
		header_str += meta_data.indv[ui];
	}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );
	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );
	vector<char> variant_line;
	entry * e = new bcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}
Example #27
0
void ReadDB::import_reads(const std::string& input_filename, const std::string& out_fasta_filename)
{
    // Open readers
    FILE* read_fp = fopen(input_filename.c_str(), "r");
    if(read_fp == NULL) {
        fprintf(stderr, "error: could not open %s for read\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    gzFile gz_read_fp = gzdopen(fileno(read_fp), "r");
    if(gz_read_fp == NULL) {
        fprintf(stderr, "error: could not open %s using gzdopen\n", input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // Open writers
    FILE* write_fp = fopen(out_fasta_filename.c_str(), "w");
    if(write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    BGZF* bgzf_write_fp = bgzf_dopen(fileno(write_fp), "w");
    if(bgzf_write_fp == NULL) {
        fprintf(stderr, "error: could not open %s for bgzipped write\n", out_fasta_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // read input sequences, add to DB and convert to fasta
    int ret = 0;
    kseq_t* seq = kseq_init(gz_read_fp);
    while((ret = kseq_read(seq)) >= 0) {

        // Check for a path to the fast5 file in the comment of the read
        std::string path = "";
        if(seq->comment.l > 0) {

            // This splitting code implicitly handles both the 2 and 3 field
            // fasta format that poretools will output. The FAST5 path
            // is always the last field.
            std::vector<std::string> fields = split(seq->comment.s, ' ');
            path = fields.back();

            // as a sanity check we require the path name to end in ".fast5"
            if(path.length() < 6 || path.substr(path.length() - 6) != ".fast5") {
                path = "";
            }
        }
        
        // sanity check that the read does not exist in the database
        // JTS 04/2019: changed error to warning to account for duplicate reads coming out of
        // some versions of guppy.
        auto iter = m_data.find(seq->name.s);
        if(iter != m_data.end()) {
            fprintf(stderr, "Warning: duplicate read name %s found in fasta file\n", seq->name.s);
            continue;
        }
        
        // add path
        add_signal_path(seq->name.s, path);

        // write sequence in gzipped fasta for fai indexing later
        std::string out_record;
        out_record += ">";
        out_record += seq->name.s;
        out_record += "\n";
        out_record += seq->seq.s;
        out_record += "\n";
        size_t write_length = bgzf_write(bgzf_write_fp, out_record.c_str(), out_record.length());
        if(write_length != out_record.length()) {
            fprintf(stderr, "error in bgzf_write, aborting\n");
            exit(EXIT_FAILURE);
        }
    }

    // check for abnormal exit conditions
    if(ret <= -2) {
        fprintf(stderr, "kseq_read returned %d indicating an error with the input file %s\n", ret, input_filename.c_str());
        exit(EXIT_FAILURE);
    }

    // cleanup
    kseq_destroy(seq);
    
    gzclose(gz_read_fp);
    fclose(read_fp);

    bgzf_close(bgzf_write_fp);
    fclose(write_fp);
}
Example #28
0
static void reheader_vcf_gz(args_t *args)
{
    BGZF *fp = bgzf_open(args->fname,"r");
    if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
        error("Failed to read %s: %s\n", args->fname, strerror(errno));

    kstring_t hdr = {0,0,0};
    char *buffer = (char*) fp->uncompressed_block;

    // Read the header and find the position of the data block
    if ( buffer[0]!='#' ) error("Could not parse the header, expected '#', found '%c'\n", buffer[0]);

    int skip_until = 1;     // end of the header in the current uncompressed block
    while (1)
    {
        if ( buffer[skip_until]=='\n' )
        {
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                kputsn(buffer,skip_until,&hdr);
                if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", args->fname);
                skip_until = 0;
            }
            // The header has finished
            if ( buffer[skip_until]!='#' )
            {
                kputsn(buffer,skip_until,&hdr);
                break;
            }
        }
        skip_until++;
        if ( skip_until>=fp->block_length )
        {
            kputsn(buffer,fp->block_length,&hdr);
            if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", args->fname);
            skip_until = 0;
        }
    }

    int nsamples = 0;
    char **samples = NULL;
    if ( args->samples_fname )
        samples = hts_readlines(args->samples_fname, &nsamples);
    if ( args->header_fname )
    {
        free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
        read_header_file(args->header_fname, &hdr);
    }
    if ( samples )
    {
        set_samples(samples, nsamples, &hdr);
        int i;
        for (i=0; i<nsamples; i++) free(samples[i]);
        free(samples);
    }

    // Output the modified header
    BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w");
    bgzf_write(bgzf_out, hdr.s, hdr.l);
    free(hdr.s);

    // Output all remainig data read with the header block
    if ( fp->block_length - skip_until > 0 )
    {
        if ( bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until)<0 ) error("Error: %d\n",fp->errcode);
    }
    if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);

    // Stream the rest of the file without as it is, without decompressing
    ssize_t nread;
    int page_size = getpagesize();
    char *buf = (char*) valloc(page_size);
    while (1)
    {
        nread = bgzf_raw_read(fp, buf, page_size);
        if ( nread<=0 ) break;

        int count = bgzf_raw_write(bgzf_out, buf, nread);
        if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }
    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
    if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
    free(buf);
}
Example #29
0
static void naive_concat(args_t *args)
{
    // only compressed BCF atm
    BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;

    const size_t page_size = 32768;
    char *buf = (char*) malloc(page_size);
    kstring_t tmp = {0,0,0};
    int i;
    for (i=0; i<args->nfnames; i++)
    {
        htsFile *hts_fp = hts_open(args->fnames[i],"r");
        if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
        htsFormat type = *hts_get_format(hts_fp);

        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");

        BGZF *fp = hts_get_bgzfp(hts_fp);
        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
            error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));

        uint8_t magic[5];
        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);

        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        hts_expand(char,tmp.l,tmp.m,tmp.s);
        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);

        // write only the first header
        if ( i==0 )
        {
            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
        }

        // Output all non-header data that were read together with the header block
        int nskip = fp->block_offset;
        if ( fp->block_length - nskip > 0 )
        {
            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
        }
        if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);


        // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
        ssize_t nread, ncached = 0, nwr;
        const int neof = 28;
        char cached[neof];
        while (1)
        {
            nread = bgzf_raw_read(fp, buf, page_size);

            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
            if ( nread<=0 ) break;
            if ( nread<=neof )      // last block
            {
                if ( ncached )
                {
                    // flush the part of the cache that won't be needed
                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);

                    // make space in the cache so that we can append to the end
                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
                }

                // fill the cache and check for eof outside this loop
                memcpy(cached+neof-nread,buf,nread);
                break;
            }

            // not the last block, flush the cache if full
            if ( ncached )
            {
                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
                ncached = 0;
            }

            // fill the cache
            nread -= neof;
            memcpy(cached,buf+nread,neof);
            ncached = neof;

            nwr = bgzf_raw_write(bgzf_out, buf, nread);
            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
        }
        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
        {
            nwr = bgzf_raw_write(bgzf_out, cached, neof);
            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
        }
        if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
    }
    free(buf);
    free(tmp.s);
    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
}
Example #30
0
File: tabix.c Project: Illumina/akt
int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf)
{
    if ( ftype & IS_TXT || !ftype )
    {
        BGZF *fp = bgzf_open(fname,"r");
        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1;

        char *buffer = fp->uncompressed_block;
        int skip_until = 0;

        // Skip the header: find out the position of the data block
        if ( buffer[0]==conf->meta_char )
        {
            skip_until = 1;
            while (1)
            {
                if ( buffer[skip_until]=='\n' )
                {
                    skip_until++;
                    if ( skip_until>=fp->block_length )
                    {
                        if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname);
                        skip_until = 0;
                    }
                    // The header has finished
                    if ( buffer[skip_until]!=conf->meta_char ) break;
                }
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname);
                    skip_until = 0;
                }
            }
        }

        // Output the new header
        FILE *hdr  = fopen(header,"r");
        if ( !hdr ) error("%s: %s", header,strerror(errno));
        const size_t page_size = 32768;
        char *buf = malloc(page_size);
        BGZF *bgzf_out = bgzf_open("-", "w");
        ssize_t nread;
        while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
        {
            if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n';
            if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
        }
        if ( fclose(hdr) ) error("close failed: %s\n", header);

        // Output all remainig data read with the header block
        if ( fp->block_length - skip_until > 0 )
        {
            if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode);
        }
        if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);

        while (1)
        {
            nread = bgzf_raw_read(fp, buf, page_size);
            if ( nread<=0 ) break;

            int count = bgzf_raw_write(bgzf_out, buf, nread);
            if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
        }
        if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
        if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
        free(buf);
    }
    else
        error("todo: reheader BCF, BAM\n");  // BCF is difficult, records contain pointers to the header.
    return 0;
}