コード例 #1
0
ファイル: thetaStat.cpp プロジェクト: Arhodes-CGRB-OSU/angsd
int64_t writeAll(std::vector<the_t> &thetas, char *chr,BGZF *fp){
  fprintf(stderr,"\tWriting: chr:%s with nSites:%zu\n",chr,thetas.size());
  int64_t retVal =bgzf_tell(fp); 
  size_t clen=strlen(chr);
  bgzf_write(fp,&clen,sizeof(size_t));//write len of chr
  bgzf_write(fp,chr,clen);//write chr
  size_t vLen = thetas.size();
  bgzf_write(fp,&vLen,sizeof(size_t));//write len of positions;
  int *posi = new int[thetas.size()];
  static float **the = new float*[5];
  for(int i=0;i<5;i++)
    the[i] = new float[thetas.size()];
  for(size_t i=0;i<thetas.size();i++){
    posi[i] =thetas[i].posi;
    for(int j=0;j<5;j++)
      the[j][i] = thetas[i].vals[j];
    delete [] thetas[i].vals;
  }
  bgzf_write(fp,posi,sizeof(int)*thetas.size());
  for(int j=0;j<5;j++){
    bgzf_write(fp,the[j],sizeof(float)*thetas.size());
    delete [] the[j];
  }
  
  delete [] posi;
  fprintf(stderr,"\tDone writing: %s\n",chr);
  return retVal;
}
コード例 #2
0
void abcSmartCounts::changeChr(int newRefId){
  if(doSmartCounts==0)
    return;
  //  fprintf(stderr,"cur:%d new:%d\n",curChr,newRefId);
  if(curChr!=-1){
    int64_t retVal =bgzf_tell(fbin); 
    int clen = strlen(header->name[curChr]);
    bgzf_write(fbin,&clen,sizeof(int));
    bgzf_write(fbin,header->name[curChr],clen);
    bgzf_write(fbin,&len,sizeof(int));
    for(int i=0;i<4;i++)
      bgzf_write(fbin,counts[i],len);//write len of chr
    
    //write index stuff
    fprintf(stderr,"Writing index for chr: %s\n",header->name[curChr]);
    fwrite(&clen,sizeof(int),1,fidx);
    fwrite(header->name[curChr] ,sizeof(char),clen,fidx);
    fwrite(&len,sizeof(int),1,fidx);
    fwrite(&retVal,sizeof(int64_t),1,fidx);
  }
  curChr = newRefId;
  len = header->l_ref[curChr];
  for(int i=0;i<4;i++){
    delete [] counts[i];
    counts[i] = new unsigned char[len];
    memset(counts[i],0,len);
  } 
}
コード例 #3
0
ファイル: ifq.c プロジェクト: mfranberg/indexedfastq
void
populate_index(uint64_t *table, cmph_t *hash, BGZF *fastq_file)
{
    while( 1 )
    {
        /* Find @ */
        char c;
        while( ( c = bgzf_getc( fastq_file ) ) != '@' && c >= 0 )
        {
        }
        
        long pos = bgzf_tell( fastq_file );
        if( pos == -1 )
        {
            break;
        }

        char *accession = NULL;
        cmph_uint32 accession_length;
        if( read_one_line( &accession, &accession_length, fastq_file ) != 1 )
        {
            break;
        }

        /* Next char is sequence, save pos */
        unsigned int id = cmph_search( hash, accession, accession_length );
        table[ id ] = (uint64_t) pos;
    }
}
コード例 #4
0
abcSmartCounts::~abcSmartCounts(){

  if(doSmartCounts==0)
    return;

  int64_t retVal =bgzf_tell(fbin); 
  int clen = strlen(header->name[curChr]);
  bgzf_write(fbin,&clen,sizeof(int));
  bgzf_write(fbin,header->name[curChr],clen);
  bgzf_write(fbin,&len,sizeof(int));
  for(int i=0;i<4;i++)
    bgzf_write(fbin,counts[i],len);//write len of chr
  
  //write index stuff
  fwrite(&clen,sizeof(int),1,fidx);
  fwrite(header->name[curChr],sizeof(char),clen,fidx);
  fwrite(&len,sizeof(int),1,fidx);
  fwrite(&retVal,sizeof(int64_t),1,fidx);

  
  for(int i=0;i<4;i++)
    delete [] counts[i];
  delete [] counts;

  fclose(fidx);
  bgzf_close(fbin);

}
コード例 #5
0
ファイル: BamFile.cpp プロジェクト: pezmaster31/pbbam
    BamFilePrivate(const std::string& fn)
        : filename_(fn)
        , firstAlignmentOffset_(-1)
    {
        // ensure we've updated htslib verbosity with requested verbosity here
        hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);

        // attempt open
        auto f = RawOpen();

#if !defined (PBBAM_NO_CHECK_EOF) || PBBAM_AUTOVALIDATE
        // sanity check on file
        const int eofCheck = bgzf_check_EOF(f->fp.bgzf);
        if (eofCheck <= 0 ) {
            // 1:  EOF present & correct
            // 2:  not seekable (e.g. reading from stdin)
            // 0:  EOF absent
            // -1: some other error
            std::stringstream e;
            if (eofCheck == 0)
                e << fn << " : is missing EOF block" << std::endl;
            else
                e << fn << " : unknown error while checking EOF block" << std::endl;
            throw std::runtime_error(e.str());
        }
#endif

        // attempt fetch header
        std::unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> hdr(sam_hdr_read(f.get()));
        header_ = internal::BamHeaderMemory::FromRawData(hdr.get());

        // cache first alignment offset
        firstAlignmentOffset_ = bgzf_tell(f->fp.bgzf);
    }
コード例 #6
0
ファイル: read_data.cpp プロジェクト: mfumagalli/ngsF
uint64_t read_chunk(double **chunk_data, params *pars, uint64_t chunk) {
	uint64_t total_elems_read = 0;

	if(chunk >= pars->n_chunks)
		error("invalid chunk number!");

	// Define chunk start and end positions
	uint64_t start_pos = chunk * pars->max_chunk_size;
	uint64_t end_pos = start_pos + pars->max_chunk_size - 1;
	if(end_pos >= pars->n_sites)	end_pos = pars->n_sites - 1;
	uint64_t chunk_size = end_pos - start_pos + 1;
	if( pars->verbose >= 6 ) printf("\tReading chunk %lu from position %lu to %lu (%lu)\n", chunk+1, start_pos, end_pos, chunk_size);

	// Search start position
#ifdef _USE_BGZF
	if( bgzf_seek(pars->in_glf_fh, pars->chunks_voffset[chunk], SEEK_SET) < 0 )
		error("cannot seek GLF file (BGZF)!");
#endif

	// Read data from file
	for(uint64_t c = 0; c < chunk_size; c++) {
#ifdef _USE_BGZF
		int bytes_read = bgzf_read(pars->in_glf_fh, chunk_data[c], (int) pars->n_ind * 3 * sizeof(double));
		if(pars->call_geno)
			call_geno(chunk_data[c], pars->n_ind, 3);
		uint64_t elems_read = (uint64_t) bytes_read / sizeof(double);
#else
		chunk_data[c] = pars->data[start_pos+c];
		uint64_t elems_read = pars->n_ind * 3;
#endif
		if( elems_read != pars->n_ind * 3 )
			error("cannot read GLF file!");
		total_elems_read += elems_read;
	}

#ifdef _USE_BGZF
	// Update index for next chunk
	if( chunk+1 != pars->n_chunks && pars->chunks_voffset[chunk+1] == 0 )
		pars->chunks_voffset[chunk+1] = bgzf_tell(pars->in_glf_fh);
#endif

	return( total_elems_read/(pars->n_ind * 3) );
}
コード例 #7
0
BgzfFileType::BgzfFileType(const char * filename, const char * mode)
{
    // If the file is for write and is '-', then write to stdout.
    if(((mode[0] == 'w') || (mode[0] == 'W')) && 
       (strcmp(filename, "-") == 0))
    {
        // Write to stdout.
        bgzfHandle = bgzf_fdopen(fileno(stdout), mode);
    }
    else if(((mode[0] == 'r') || (mode[0] == 'R')) && 
       (strcmp(filename, "-") == 0))
    {
        // read from stdin
        bgzfHandle = bgzf_fdopen(fileno(stdin), mode);
    }
    else
    {
        bgzfHandle = bgzf_open(filename, mode);
    }

    myStartPos = 0;
    if (bgzfHandle != NULL)
    {
        // Check to see if the file is being opened for read, if the eof block
        // is required, and if it is, if it is there.
        if ((mode[0] == 'r' || mode[0] == 'R') && ourRequireEofBlock &&
                (bgzf_check_EOF(bgzfHandle) == 0))
        {
            std::cerr << "BGZF EOF marker is missing in " << filename << std::endl;
            // the block is supposed to be there, but isn't, so close the file.
            close();
        }
        else
        {
            // Successfully opened a properly formatted file, so get the start
            // position.
            myStartPos = bgzf_tell(bgzfHandle);
        }
    }

    myEOF = false;
}
コード例 #8
0
ファイル: prep_sites.cpp プロジェクト: SamueleSoraggi/angsd
//return zero if fine.
int writeDat(char *last,mmap &mm,tary<char> *keep,tary<char> *major,tary<char> *minor,BGZF *BFP,FILE *fp,int doCompl){
  assert(last!=NULL);
  if((major!=NULL) ^ (minor!=NULL)){
    fprintf(stderr,"major and minor should be the same\n");
    return 1;
  }
  int hasMajMin =0;
  if(major!=NULL)
    hasMajMin =1;
  fprintf(stderr,"\t-> Writing chr:\'%s\' \n",last);
  mmap::iterator it=mm.find(last);
  if(it!=mm.end()){
    return 1;
  }else
    mm[strdup(last)]=1;
  //write data and index stuff
  int64_t retVal =bgzf_tell(BFP);//now contains the offset to which we should point.
  
  //write chrname
  int clen=strlen(last)+1;
  fwrite(&clen,1,sizeof(int),fp);
  fwrite(last,clen,sizeof(char),fp);
  
  fwrite(&retVal,1,sizeof(int64_t),fp);
  for(int i=0;doCompl&&i<keep->l;i++)
    if(keep->d[i]==0)
      keep->d[i]=1;
    else
      keep->d[i]=0;
  

  fwrite(&keep->l,sizeof(size_t),1,fp);//write len of chr
  fwrite(&hasMajMin,1,sizeof(int),fp);
  aio::bgzf_write(BFP,keep->d,keep->l);//write keep
  if(hasMajMin){
    aio::bgzf_write(BFP,major->d,major->l);//write maj
    aio::bgzf_write(BFP,minor->d,minor->l);//write min
  }
  return 0;
}
コード例 #9
0
ファイル: realSFS.cpp プロジェクト: plibrado/angsd
int fst_index(int argc,char **argv){
  if(argc<1){
    fprintf(stderr,"Must supply afile.saf.idx [chrname, write more info]\n");
    return 0; 
  }
  args *arg = getArgs(argc,argv);
  if(!arg->fstout){
    fprintf(stderr,"\t-> Must supply -fstout for doing fstindex\n");
    return 0;
  }

  std::vector<persaf *> &saf =arg->saf;
  //assert(saf.size()==2);
  size_t nSites = arg->nSites;
  if(nSites == 0){//if no -nSites is specified
    nSites = 100000;//<- set default to 100k sites, no need to load everything...
    // nSites=nsites(saf,arg);
  }
  fprintf(stderr,"\t-> nSites: %lu\n",nSites);
  std::vector<Matrix<float> *> gls;
  for(int i=0;i<saf.size();i++)
    gls.push_back(alloc<float>(nSites,saf[i]->nChr+1));

  //  int ndim= parspace(saf);
  if(arg->sfsfname.size()!=choose(saf.size(),2)){
    fprintf(stderr,"\t-> You have supplied: %lu populations, that is %d pairs\n",saf.size(),choose(saf.size(),2));
    fprintf(stderr,"\t-> You therefore need to supply %d 2dsfs priors instead of:%lu\n",choose(saf.size(),2),arg->sfsfname.size());
    exit(0);
  }
  std::vector<double *> sfs;
  int inc =0;
  for(int i=0;i<saf.size();i++)
    for(int j=i+1;j<saf.size();j++){
      size_t pairdim = (saf[i]->nChr+1)*(saf[j]->nChr+1);
      double *ddd=new double[pairdim];
      readSFS(arg->sfsfname[inc],pairdim,ddd);
      normalize(ddd,pairdim);
      sfs.push_back(ddd);
      inc++;
    }

  
  double **a1,**b1;
  a1=new double*[choose(saf.size(),2)];
  b1=new double*[choose(saf.size(),2)];
  inc=0;
  for(int i=0;i<saf.size();i++)
    for(int j=i+1;j<saf.size();j++){
      calcCoef((int)saf[i]->nChr,(int)saf[j]->nChr,&a1[inc],&b1[inc]);
      //      fprintf(stderr,"a1[%d]:%p b1[%d]:%p\n",inc,&a1[inc][0],inc,&b1[inc][0]);
      inc++;
    }

  BGZF *fstbg = openFileBG(arg->fstout,".fst.gz");
  FILE *fstfp = openFile(arg->fstout,".fst.idx");
  char buf[8]="fstv1";
  bgzf_write(fstbg,buf,8);    
  fwrite(buf,1,8,fstfp);
#if 0
  for(int i=0;i<ndim;i++)
    fprintf(stdout,"%f %f\n",a1[i],b1[i]);
  exit(0);
#endif
#if 1
  size_t nsafs=saf.size();
  fwrite(&nsafs,sizeof(size_t),1,fstfp);
  for(int i=0;i<nsafs;i++){
    size_t clen= strlen(saf[i]->fname);
    fwrite(&clen,sizeof(size_t),1,fstfp);
    fwrite(saf[i]->fname,1,clen,fstfp);
  }
#endif
  int asdf = choose(saf.size(),2);
  std::vector<double> *ares = new std::vector<double> [choose(saf.size(),2)];
  std::vector<double> *bres = new std::vector<double> [choose(saf.size(),2)];
  //  for(int i=0;i<3;i++)
    //    fprintf(stderr,"ares.size():%lu bres.size():%lu sfs:%p\n",ares[i].size(),bres[i].size(),&sfs[i][0]);
  std::vector<int> posi;
  setGloc(saf,nSites);
  int *posiToPrint = new int[nSites];
  for(myMap::iterator it = saf[0]->mm.begin();it!=saf[0]->mm.end();++it) {
    //    fprintf(stderr,"doing chr:%s\n",it->first);
    if(arg->chooseChr!=NULL){
      it = saf[0]->mm.find(arg->chooseChr);
      if(it==saf[0]->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",arg->chooseChr);
	break;
      }
    }
    for(int i=0;i<choose(saf.size(),2);i++){
      ares[i].clear();
      bres[i].clear();
    }
    posi.clear();
    while(1) {
      int ret=readdata(saf,gls,nSites,it->first,arg->start,arg->stop,posiToPrint,NULL);//read nsites from data
      //  fprintf(stderr,"ret:%d glsx:%lu\n",ret,gls[0]->x);
      //if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){
	//fprintf(stderr,"continue continue\n");
      //	continue;
      //}
      
      fprintf(stderr,"\t-> Will now do fst temp dump using a chunk of %lu\n",gls[0]->x);
      int inc=0;
      for(int i=0;i<saf.size();i++)
	for(int j=i+1;j<saf.size();j++){
	  //	  fprintf(stderr,"i:%d j:%d inc:%d gls[i]:%p gls[j]:%p sfs:%p a1:%p b1:%p\n",i,j,inc,gls[i],gls[j],sfs[i],&a1[inc][0],&a1[inc][0]);
	  block_coef(gls[i],gls[j],sfs[inc],a1[inc],b1[inc],ares[inc],bres[inc]);
	  inc++;
	}
      for(int i=0;i<gls[0]->x;i++)
	posi.push_back(posiToPrint[i]);

      for(int i=0;i<gls.size();i++)
	gls[i]->x =0;
      if(ret==-2)//no more data in files or in chr, eith way we break;
	break;
    }
    size_t clen = strlen(it->first);
    fwrite(&clen,sizeof(size_t),1,fstfp);
    fwrite(it->first,1,clen,fstfp);
    size_t nit=posi.size();

    assert(1==fwrite(&nit,sizeof(size_t),1,fstfp));
    int64_t tell = bgzf_tell(fstbg);
    fwrite(&tell,sizeof(int64_t),1,fstfp);
    bgzf_write(fstbg,&posi[0],posi.size()*sizeof(int));
    int inc =0;
    for(int i=0;i<saf.size();i++)
      for(int j=i+1;j<saf.size();j++){
	bgzf_write(fstbg,&(ares[inc][0]),ares[inc].size()*sizeof(double));
	bgzf_write(fstbg,&(bres[inc][0]),bres[inc].size()*sizeof(double));
	inc++;
      }
    if(arg->chooseChr!=NULL)
      break;
  }
  delGloc(saf,nSites);
  destroy(gls,nSites);
  destroy_args(arg);
  for(int i=0;i<sfs.size();i++)
    delete [] sfs[i];
#if 0
  fprintf(stderr,"\n\t-> NB NB output is no longer log probs of the frequency spectrum!\n");
  fprintf(stderr,"\t-> Output is now simply the expected values! \n");
  fprintf(stderr,"\t-> You can convert to the old format simply with log(norm(x))\n");
#endif
  bgzf_close(fstbg);
  fclose(fstfp);
  fprintf(stderr,"\t-> fst index finished with no errors!\n");
  return 0;
}
コード例 #10
0
ファイル: thrash_threads6.c プロジェクト: atks/vt
int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "Usage: thrash_threads4 input.bam\n");
        exit(1);
    }

    // Find a valid seek location ~64M into the file
    int i;
    ssize_t got;
    BGZF *fpin  = bgzf_open(argv[1], "r");
    uint64_t upos = 0, uend = 0;
    char buf[100000];
    for (i = 0; i < 100; i++) {
        if ((got = bgzf_read(fpin, buf, 65536)) < 0)
            abort();
        upos += got;
    }
    int64_t pos = bgzf_tell(fpin);
    while ((got = bgzf_read(fpin, buf, 65536)) > 0) {
        uend += got;
    }
    if (got < 0) abort();
    int64_t end = bgzf_tell(fpin);
    bgzf_close(fpin);

    // Ensure input is big enough to avoid case 3,4 below going off the end
    // of the file
    if (uend < upos + 10000000) {
        fprintf(stderr, "Please supply a bigger input file\n");
        exit(1);
    }

#define N 1000

    // Spam random seeks & reads
    for (i = 0; i < 1000; i++) {
        printf("i=%d\t", i);
        fpin  = bgzf_open(argv[1], "r");
        int j, eof = 0, mt = 0;
        for (j = 0; j < 80; j++) {
            int n = rand() % 7;
            putchar('0'+n); fflush(stdout);
            switch (n) {
            case 0: // start
                if (bgzf_seek(fpin, 0LL, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 1: // mid
                if (bgzf_seek(fpin, pos, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 2: // eof
                if (bgzf_seek(fpin, end, SEEK_SET) < 0) puts("!");//abort();
                eof = 1;
                break;
            case 3: case 4: {
                int l = rand()%(n==3?100000:100);
                if (bgzf_read(fpin, buf, l) != l*(1-eof)) abort();
                break;
            }
            case 5:
                usleep(N);
                break;
            case 6:
                if (!mt)
                    bgzf_mt(fpin, 8, 256);
                mt = 1;
                break;
            }
        }
        printf("\n");
        if (bgzf_close(fpin))
            abort();
    }

    return 0;
}
コード例 #11
0
ファイル: bgzf_stubs.c プロジェクト: mlin/ocaml-bgzf
value caml_bgzf_tell(value bgzf) {
	CAMLparam1(bgzf);
	CAMLreturn(copy_int64(bgzf_tell(BGZF_val(bgzf))));
}
コード例 #12
0
/**
 * Create single chromosome index file
 * the file content is a 2-column matrix of int64_t type
 * line1:  num_sample  num_marker
 * line2:  0           bgzf_offset_for_#CHROM_line
 * line3:  var_1_pos   bgzf_offset_for_var_1
 * ...
 */
int SingleChromosomeBCFIndex::createIndex() {
  // const char* fn = bcfFile_.c_str();
  BGZF* fp = fBcfFile_;  // bgzf_open(fn, "rb");
  bgzf_seek(fp, 0, SEEK_SET);

  // check magic number
  char magic[5];
  if (5 != bgzf_read(fp, magic, 5)) {
    return -1; // exit(1);
  }
  if (!(magic[0] == 'B' && magic[1] == 'C' && magic[2] == 'F' &&
        magic[3] == 2 && (magic[4] == 1 || magic[4] == 2))) {
    return -1; // exit(1);
  }

  // read header
  uint32_t l_text;
  if (4 != bgzf_read(fp, &l_text, 4)) {
    return -1; // exit(1);
  }
  Rprintf("l_text = %d\n", l_text);

  std::string s;
  int64_t bgzf_offset_before_header = bgzf_tell(fp); // the beginning of header block
  s.resize(l_text);
  if (bgzf_read(fp, (void*)s.data(), l_text) != l_text) {
    REprintf( "Read failed!\n");
  }
  BCFHeader bcfHeader;
  if (bcfHeader.parseHeader(s,
                  &bcfHeader.header_contig_id,
                  &bcfHeader.header_id,
                  &bcfHeader.header_number,
                  &bcfHeader.header_type,
                  &bcfHeader.header_description)) {
    REprintf( "Parse header failed!\n");
    return -1; // exit(1);
  }

  // locate #CHROM line
  int64_t bgzf_offset_after_header = bgzf_tell(fp); // the end of header block
  size_t ptr_chrom_line = s.find("#CHROM"); // the index of "#CHROM", also the size between beginning of header to '#CHROM'
  if (ptr_chrom_line == std::string::npos) {
    REprintf( "Cannot find the \"#CHROM\" line!\n");
    return -1; // exit(1);
  }
  Rprintf("offset_header = %d\n", (int) ptr_chrom_line);

  bgzf_seek(fp, bgzf_offset_before_header, SEEK_SET); // rewind fp to the beginning of header
  s.resize(ptr_chrom_line);
  int64_t before_chrom_size = bgzf_read(fp, (void*) s.data(), ptr_chrom_line);
  int64_t bgzf_offset_before_chrom = bgzf_tell(fp); // the offset to #CHROM
  s.resize(l_text - before_chrom_size);
  int64_t after_chrom_size = bgzf_read(fp, (void*) s.data(), l_text - before_chrom_size);
  // load sample names
  while (s.back() == '\n' || s.back() == '\0') {
    s.resize(s.size() - 1);
  }
  stringTokenize(s, "\t", &bcfHeader.sample_names);
  const int64_t num_sample = (int)bcfHeader.sample_names.size() - 9; // vcf header has 9 columns CHROM...FORMAT before actual sample names
  Rprintf("sample size = %ld\n", num_sample);
  Rprintf("last character is s[after_chrom_size-1] = %d\n", s[after_chrom_size - 1]); // should be 0, the null terminator character
  // quality check
  if (bgzf_offset_after_header != bgzf_tell(fp)) {
    REprintf( "Messed up bgzf header\n");
    return -1; // exit(1);
  }

  // create index file
  FILE* fIndex = fopen(indexFile_.c_str(), "wb");
  int64_t num_marker = 0;
  int64_t pos = 0;
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fwrite(&pos, sizeof(int64_t), 1, fIndex);
  fwrite(&bgzf_offset_before_chrom, sizeof(int64_t), 1, fIndex);

  uint32_t l_shared;
  uint32_t l_indiv;
  std::vector<char> data;
  int64_t offset;
  do {
    offset = bgzf_tell(fp);
    if (4 != bgzf_read(fp, &l_shared, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    if (4 != bgzf_read(fp, &l_indiv, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    data.resize(l_shared + l_indiv);
    if (l_shared + l_indiv != bgzf_read(fp, data.data(), (l_shared+l_indiv) * sizeof(char))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    memcpy(&pos, data.data() + 4, 4);
    fwrite(&pos, sizeof(int64_t), 1, fIndex);
    fwrite(&offset, sizeof(int64_t), 1, fIndex);

    num_marker++;
    if (num_marker % 10000 == 0) {
      Rprintf("\rprocessed %ld markers", num_marker);
    }
  } while (true);

  if (fseek(fIndex, 0, SEEK_SET)) {
    REprintf( "fseek failed\n!");
  }
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fclose(fIndex);
  Rprintf("Indexing finished with %ld samples and %ld markers\n", num_sample, num_marker);

  return 0;
}