Exemplo n.º 1
0
size_t count_fastq_sequences(BGZF *fastq_file)
{
    bgzf_seek( fastq_file, 0, SEEK_SET );

    cmph_uint32 count = 0;
    while( 1 )
    {
        char buffer[ BUFSIZ ];
        ssize_t bytes_read = bgzf_read( fastq_file, buffer, BUFSIZ );
        if( bytes_read <= 0 )
        {
            break;
        }

        int i;
        for(i = 0; i < bytes_read; i++)
        {
            if( buffer[ i ] == '@' )
            {
                count++;
            }
        }
    }

    bgzf_seek( fastq_file, 0, SEEK_SET );
    return count;
}
Exemplo n.º 2
0
ifq_codes_t
ifq_query_index(ifq_index_t *index, char *query, ifq_record_t *record)
{
    // Find key
    unsigned int id = cmph_search( index->hash, query, (cmph_uint32) strlen( query ) );
    uint64_t pos = index->table[ id ];
    if( bgzf_seek( index->fastq_file, pos, SEEK_SET ) < 0 )
    {
        return IFQ_NOT_FOUND;
    }

    cmph_uint32 length;
    read_one_line( &record->name, &length, index->fastq_file );
    if( strncmp( record->name, query, length ) == 0 )
    {
        read_one_line( &record->sequence, &length, index->fastq_file );
        read_one_line( &record->quality, &length, index->fastq_file );
        read_one_line( &record->quality, &length, index->fastq_file );
    }
    else
    {
        return IFQ_NOT_FOUND;
    }

    return IFQ_OK;
}
BamMerge::BamMerge(const vector<string>& bam_fnames,
		   vector<int64_t> file_offsets) :
  _bam_fnames(bam_fnames),
  _lines(less_bam(true)),
  _last_id(0)
{
  if (bam_fnames.size() <= 0)
    return;
  
  for (size_t i = 0; i < _bam_fnames.size(); ++i) {
    const char* fname = _bam_fnames[i].c_str();
    samfile_t* fp = samopen(fname, "rb", 0);
    if (fp==0) {
      warn_msg(ERR_BAM_OPEN, fname);
      exit(1);
    }

    if (bam_fnames.size() == file_offsets.size() &&
	file_offsets[i] > 0)
      bgzf_seek(fp->x.bam, file_offsets[i], SEEK_SET);

    bam1_t* b = bam_init1();
    if (samread(fp, b) > 0) {
      _src_files.push_back(fp);
      CBamLine brec(_lines.size(), b, fp->header);
      _lines.push(brec);
    }
    else { bam_destroy1(b); }
  }

  if (_lines.size() == 0) {
    warn_msg("Warning: no input BAM records found.\n");
    exit(1);
  }
}
Exemplo n.º 4
0
int print(int argc, char**argv){
  if(argc==0){
    fprintf(stderr,"print FILE [-r chrName]\n");
    exit(0);
  }
  char *base = *argv;
  char* outnames_bin = append(base,BIN);
  char* outnames_idx = append(base,IDX);
  fprintf(stderr,"Assuming binfile:%s and indexfile:%s\n",outnames_bin,outnames_idx);
  
  myMap mm = getMap(outnames_idx);
  writemap(stderr,mm);
  BGZF *fp = bgzf_open(outnames_bin,"r");

  --argc;++argv;
  //  fprintf(stderr,"argc=%d\n",argc);
  int argP =0;
  char *chr=NULL;

  while(argP<argc){
    //   fprintf(stderr,"args=%s\n",argv[argP]);
    if(argP==argc){
      fprintf(stderr,"incomplete arguments list\n");
      exit(0);
    }
    if(strcmp("-r",argv[argP])==0)
      chr = argv[argP+1];
    else {
      fprintf(stderr,"Unknown argument:%s\n",argv[argP]);
      exit(0);

    }
    argP +=2;
  }
  
  
  if(chr!=NULL){  
    myMap::iterator it = mm.find(chr);
    if(it==mm.end()){
      fprintf(stderr,"Problem finding chr: %s in index\n",chr);
      exit(0);
    }
    datum d = it->second;
    bgzf_seek(fp,d.fpos,SEEK_SET);
  }

  while(1){
    perChr pc = getPerChr(fp);
    if(pc.nSites==0)
      break;
    fprintf(stderr,"pc.chr=%s pc.nSites=%zu firstpos=%d lastpos=%d\n",pc.chr,pc.nSites,pc.posi[0],pc.posi[pc.nSites-1]);
    print_main(pc,stdout);
    if(chr!=NULL)
      break;
    dalloc(pc);
  }

  return 0;
}
Exemplo n.º 5
0
uint64_t read_chunk(double **chunk_data, params *pars, uint64_t chunk) {
	uint64_t total_elems_read = 0;

	if(chunk >= pars->n_chunks)
		error("invalid chunk number!");

	// Define chunk start and end positions
	uint64_t start_pos = chunk * pars->max_chunk_size;
	uint64_t end_pos = start_pos + pars->max_chunk_size - 1;
	if(end_pos >= pars->n_sites)	end_pos = pars->n_sites - 1;
	uint64_t chunk_size = end_pos - start_pos + 1;
	if( pars->verbose >= 6 ) printf("\tReading chunk %lu from position %lu to %lu (%lu)\n", chunk+1, start_pos, end_pos, chunk_size);

	// Search start position
#ifdef _USE_BGZF
	if( bgzf_seek(pars->in_glf_fh, pars->chunks_voffset[chunk], SEEK_SET) < 0 )
		error("cannot seek GLF file (BGZF)!");
#endif

	// Read data from file
	for(uint64_t c = 0; c < chunk_size; c++) {
#ifdef _USE_BGZF
		int bytes_read = bgzf_read(pars->in_glf_fh, chunk_data[c], (int) pars->n_ind * 3 * sizeof(double));
		if(pars->call_geno)
			call_geno(chunk_data[c], pars->n_ind, 3);
		uint64_t elems_read = (uint64_t) bytes_read / sizeof(double);
#else
		chunk_data[c] = pars->data[start_pos+c];
		uint64_t elems_read = pars->n_ind * 3;
#endif
		if( elems_read != pars->n_ind * 3 )
			error("cannot read GLF file!");
		total_elems_read += elems_read;
	}

#ifdef _USE_BGZF
	// Update index for next chunk
	if( chunk+1 != pars->n_chunks && pars->chunks_voffset[chunk+1] == 0 )
		pars->chunks_voffset[chunk+1] = bgzf_tell(pars->in_glf_fh);
#endif

	return( total_elems_read/(pars->n_ind * 3) );
}
Exemplo n.º 6
0
int SingleChromosomeBCFIndex::readLine(int64_t offset,
                                       uint32_t* l_shared,
                                       uint32_t* l_indiv,
                                       std::vector<char>* line) {
  if (bgzf_seek(fBcfFile_, offset, SEEK_SET)) {
    REprintf("seek error!\n");
  }
  
  if (4 != bgzf_read(fBcfFile_, l_shared, sizeof(uint32_t)) ||
      4 != bgzf_read(fBcfFile_, l_indiv, sizeof(uint32_t))) {
    REprintf("readLine error!\n");
  }
  uint32_t totalLen =  *l_shared + *l_indiv;
  line->resize(totalLen);
  if ( totalLen != bgzf_read(fBcfFile_, line->data(), totalLen)) {
    REprintf("readLine bgzf_read error!\n");
  }

  return totalLen;
}
Exemplo n.º 7
0
//hint is the suggested newsize
void filt_readSites(filt*fl,char *chr,size_t hint) {
  assert(fl!=NULL);

  std::map<char*,asdf_dats,ltstr> ::iterator it = fl->offs.find(chr);
  if(it==fl->offs.end()){
    fprintf(stderr,"\n\t-> Potential problem: The filereading has reached a chromsome: \'%s\', which is not included in your \'-sites\' file.\n\t-> Please consider limiting your analysis to the chromsomes of interest \n",chr);
    fprintf(stderr,"\t-> see \'http://www.popgen.dk/angsd/index.php/Sites\' for more information\n");
    fprintf(stderr,"\t-> Program will continue reading this chromosome... \n");
    //exit(0);
    free(fl->keeps);
    free(fl->minor);
    free(fl->major);
    fl->keeps=fl->minor=fl->major=NULL;
    fl->curLen =0;
    return;
  }

  bgzf_seek(fl->bg,it->second.offs,SEEK_SET);

  size_t nsize = std::max(fl->curLen,hint);
  nsize = std::max(nsize,it->second.len);
  if(nsize>fl->curLen) 
    fl->keeps=(char*) realloc(fl->keeps,nsize);
  memset(fl->keeps,0,nsize);
  //fprintf(stderr,"it->second.len:%lu fl->curLen:%lu fl->keeps:%p\n",it->second.len,fl->curLen,fl->keeps);
  bgzf_read(fl->bg,fl->keeps,it->second.len);

  if(fl->hasMajMin==1){
    if(nsize>fl->curLen) {
      fl->major = (char*) realloc(fl->major,nsize);
      fl->minor = (char*) realloc(fl->minor,nsize);
      memset(fl->major,0,nsize);
      memset(fl->minor,0,nsize);
    }
    bgzf_read(fl->bg,fl->major,it->second.len);
    bgzf_read(fl->bg,fl->minor,it->second.len);
  }
  fl->curNam=chr;
  fl->curLen = nsize;
}
Exemplo n.º 8
0
ifq_codes_t ifq_create_index(char *fastq_path, char *index_prefix)
{
    char *hash_path = concatenate( index_prefix, ".hsh" );
    char *seek_path = concatenate( index_prefix, ".lup" );
    ifq_codes_t ret = IFQ_OK;
    
    /* Open output files */
    BGZF *fastq_file = bgzf_open( fastq_path, "r" );
    if( fastq_file == NULL )
    {
        ret = IFQ_BAD_FASTQ;
        goto index_fastq_fail;
    }
    
    FILE *hash_file = fopen( hash_path, "w" );
    if( hash_file == NULL )
    {
        ret = IFQ_BAD_PREFIX;
        goto index_prefix_fail;
    }

    /* Create hash function */
    cmph_io_adapter_t *source = cmph_io_fastq_adapter( fastq_file );
    if( source == NULL )
    {
        ret = IFQ_BAD_HASH;
        goto index_prefix_fail;
    }

    cmph_config_t *config = cmph_config_new( source );
    cmph_config_set_algo( config, CMPH_CHD );
    cmph_config_set_mphf_fd( config, hash_file );
    cmph_t *hash = cmph_new( config );
    if( hash == NULL )
    {
        ret = IFQ_BAD_HASH;
        goto index_hash_fail;
    }

    /* Create the file index using the hash */
    bgzf_seek( fastq_file, 0, SEEK_SET );
    if( create_index( fastq_file, hash, seek_path ) != 1 )
    {
        ret = IFQ_BAD_INDEX;
        goto index_create_fail;
    }
index_fastq_fail:
    free( hash_path );
    free( seek_path );

index_create_fail:
    cmph_config_destroy( config );
    cmph_dump( hash, hash_file );
    cmph_destroy( hash );
    free( source );

index_hash_fail:
    fclose( hash_file );

index_prefix_fail:
    bgzf_close( fastq_file );

    return ret;
}
Exemplo n.º 9
0
int do_stat(int argc, char**argv){
  if(argc==0){
    fprintf(stderr,"do_stat FILE -win -step -nChr [-r chrName -type [0,1,2]]\n");
    exit(0);
  }
  char *base = *argv;
  char* outnames_bin = append(base,BIN);
  char* outnames_idx = append(base,IDX);
  fprintf(stderr,"\tAssuming binfile:%s and indexfile:%s\n",outnames_bin,outnames_idx);
  
  myMap mm = getMap(outnames_idx);
  writemap(stderr,mm);
  BGZF *fp = bgzf_open(outnames_bin,"r");

  --argc;++argv;
  //  fprintf(stderr,"argc=%d\n",argc);
  int argP =0;
  char *chr=NULL;
  char *outnames = NULL;
  int nChr =0;
  int win =0;
  int step =0;
  int type =0;
  while(argP<argc){
    //   fprintf(stderr,"args=%s\n",argv[argP]);
    if(argP==argc){
      fprintf(stderr,"incomplete arguments list\n");
      exit(0);
    }
    if(strcmp("-r",argv[argP])==0)
      chr = argv[argP+1];
    else if(strcmp("-outnames",argv[argP])==0)
      outnames = argv[argP+1];
    else if(strcmp("-step",argv[argP])==0)
      step = atoi(argv[argP+1]);
    else if(strcmp("-win",argv[argP])==0)
      win = atoi(argv[argP+1]);
    else if(strcmp("-nChr",argv[argP])==0)
      nChr = atoi(argv[argP+1]);
    else if(strcmp("-type",argv[argP])==0)
      type = atoi(argv[argP+1]);
    
    else {
      fprintf(stderr,"Unknown argument:%s\n",argv[argP]);
      exit(0);
    }
    argP +=2;
  }

  fprintf(stderr,"\t -r=%s outnames=%s step: %d win: %d nChr:%d\n",chr,outnames,step,win,nChr);
  if(nChr==0){
    fprintf(stderr,"nChr must be different from zero\n");
    exit(0);
  }
  if(win==0||step==0){
    fprintf(stderr,"\tWinsize equals zero or step size equals zero. Will use entire chromosome as window\n");
    win=step=0;
  }  
  
  if(chr!=NULL){  
    myMap::iterator it = mm.find(chr);
    if(it==mm.end()){
      fprintf(stderr,"\tProblem finding chr: %s in index\n",chr);
      exit(0);
    }
    datum d = it->second;
    bgzf_seek(fp,d.fpos,SEEK_SET);
  }
  if(outnames==NULL)
    outnames = base;

  char *resname = append(outnames,RES);
  FILE *fpres = fopen(resname,"w");
  //fprintf(fpres,"## thetaStat VERSION: %s build:(%s,%s)\n",VERSION,__DATE__,__TIME__);
  fprintf(fpres,"#(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinStop)\t");
  fprintf(fpres,"Chr\tWinCenter\t");
  fprintf(fpres,"tW\ttP\ttF\ttH\ttL\t");
  fprintf(fpres,"Tajima\tfuf\tfud\tfayh\tzeng\tnSites\n");
  while(1){
    perChr pc = getPerChr(fp);
    if(pc.nSites==0)
      break;
    fprintf(stderr,"\tpc.chr=%s pc.nSites=%zu firstpos=%d lastpos=%d\n",pc.chr,pc.nSites,pc.posi[0],pc.posi[pc.nSites-1]);
    kstring_t str = do_stat_main(pc,step,win,nChr,type);
    fwrite(str.s,1,str.l,fpres);//should clean up str, doesn't matter for this program;
    fflush(fpres);
    if(chr!=NULL)
      break;
    dalloc(pc);
  }
  fclose(fpres);
  fprintf(stderr,"\tDumping file: \"%s\"\n",resname);
  return 0;
}
Exemplo n.º 10
0
int fst_print(int argc,char **argv){

  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  writefst_header(stderr,pf);  
  args *pars = getArgs(--argc,++argv);  
  int *ppos = NULL;
  fprintf(stderr,"choose:%d \n",choose(pf->names.size(),2));
  double **ares = new double*[choose(pf->names.size(),2)];
  double **bres = new double*[choose(pf->names.size(),2)];
  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    if(it->second.nSites==0)
      continue;
    bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<choose(pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    


    int first=0;
    if(pars->start!=-1)
      while(ppos[first]<pars->start) 
	first++;
    
    int last=it->second.nSites;

    if(pars->stop!=-1&&pars->stop<=ppos[last-1]){
      last=first;
      while(ppos[last]<pars->stop) 
	last++;
    }

    fprintf(stderr,"pars->stop:%d ppos:%d first:%d last:%d\n",pars->stop,ppos[last-1],first,last);

    for(int s=first;s<last;s++){
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose(pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
    }
    for(int i=0;i<choose(pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}
Exemplo n.º 11
0
int fst_stat(int argc,char **argv){
  
  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  args *pars = getArgs(--argc,++argv);  
  int *ppos = NULL;
  int chs = choose(pf->names.size(),2);
  // fprintf(stderr,"choose:%d \n",chs);
  double **ares = new double*[chs];
  double **bres = new double*[chs];
  double unweight[chs];
  double wa[chs];
  double wb[chs];
  size_t nObs[chs];
  for(int i=0;i<chs;i++){
    unweight[i] = wa[i] = wb[i] =0.0;
    nObs[i] = 0;
  }
  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    if(it->second.nSites==0)
      continue;
    bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<choose(pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    


    int first=0;
    if(pars->start!=-1)
      while(ppos[first]<pars->start) 
	first++;
    
    int last=it->second.nSites;

    if(pars->stop!=-1&&pars->stop<=ppos[last-1]){
      last=first;
      
      while(ppos[last]<pars->stop) 
	last++;
    }

    //  fprintf(stderr,"pars->stop:%d ppos:%d first:%d last:%d\n",pars->stop,ppos[last-1],first,last);

    for(int s=first;s<last;s++){
#if 0
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose(pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
#endif
      for(int i=0;i<choose(pf->names.size(),2);i++){
	if(bres[i][s]!=0){
	  unweight[i] += ares[i][s]/bres[i][s];
	  nObs[i]++;
	}
	wa[i] += ares[i][s];
	wb[i] += bres[i][s];
      }
    }
    for(int i=0;i<choose(pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
  double fstUW[chs];
  double fstW[chs];
  for(int i=0;i<chs;i++){
    fstUW[i] = unweight[i]/(1.0*nObs[i]);
    fstW[i] = wa[i]/wb[i];
    fprintf(stderr,"\t-> FST.Unweight[nObs:%lu]:%f Fst.Weight:%f\n",nObs[i],fstUW[i],fstW[i]);
    fprintf(stdout,"%f %f\n",fstUW[i],fstW[i]);
  }
  if(chs==3){
    //if chr==3 then we have 3pops and we will also calculate pbs statistics
    calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values
    for(int i=0;i<3;i++)
      fprintf(stderr,"\t-> pbs.pop%d\t%f\n",i+1,fstW[i]);
  }
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}
Exemplo n.º 12
0
int fst_stat2(int argc,char **argv){
  int pS,pE;//physical start,physical end
  int begI,endI;//position in array for pS, pE;
  
  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  args *pars = getArgs(--argc,++argv);
  fprintf(stderr,"win:%d step:%d\n",pars->win,pars->step);
  int *ppos = NULL;
  int chs = choose(pf->names.size(),2);
  // fprintf(stderr,"choose:%d \n",chs);
  double **ares = new double*[chs];
  double **bres = new double*[chs];
  double unweight[chs];
  double wa[chs];
  double wb[chs];
  size_t nObs =0;
 
  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    fprintf(stderr,"nSites:%lu\n",it->second.nSites);
    if(it->second.nSites==0&&pars->chooseChr!=NULL)
      break;
    else if(it->second.nSites==0&&pars->chooseChr==NULL)
      continue;
    bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<it->second.nSites;i++)
      ppos[i]++;
    for(int i=0;i<choose(pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    

    if(pars->type==0)
      pS = ((pars->start!=-1?pars->start:ppos[0])/pars->step)*pars->step +pars->step;
    else if(pars->type==1)
      pS = (pars->start!=-1?pars->start:ppos[0]);
    else if(pars->type==2)
      pS = 1;
    pE = pS+pars->win;
    begI=endI=0;

    //    fprintf(stderr,"ps:%d\n",pS);exit(0);
    if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1])){
    fprintf(stderr,"end of dataset is before end of window: end of window:%d last position in chr:%d\n",pE,ppos[it->second.nSites-1]);
    //    return str;
  }

  while(ppos[begI]<pS) begI++;
  
  endI=begI;
  while(ppos[endI]<pE) endI++;

  //fprintf(stderr,"begI:%d endI:%d\n",begI,endI);

  while(1){
    for(int i=0;i<chs;i++)
      unweight[i] = wa[i] = wb[i] =0.0;
    nObs=0;
    fprintf(stdout,"(%d,%d)(%d,%d)(%d,%d)\t%s\t%d",begI,endI-1,ppos[begI],ppos[endI-1],pS,pE,it->first,pS+(pE-pS)/2);
    for(int s=begI;s<endI;s++){
#if 0
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose(pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
#endif
      for(int i=0;i<choose(pf->names.size(),2);i++){
	unweight[i] += ares[i][s]/bres[i][s];
	wa[i] += ares[i][s];
	wb[i] += bres[i][s];
      }
      nObs++;
    }
    double fstW[chs];
    for(int i=0;nObs>0&&i<chs;i++){
      fstW[i] = wa[i]/wb[i];
      fprintf(stdout,"\t%f\t%f",unweight[i]/(1.0*nObs),fstW[i]);
    }
    if(chs==3){
      //if chr==3 then we have 3pops and we will also calculate pbs statistics
      calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values
      for(int i=0;i<3;i++)
	fprintf(stdout,"\t%f",fstW[i]);
    }
    fprintf(stdout,"\n");

    pS += pars->step;
    pE =pS+pars->win;
    if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1]))
      break;
    
    while(ppos[begI]<pS) begI++;
    while(ppos[endI]<pE) endI++;
  }
    for(int i=0;i<choose(pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
 
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}
Exemplo n.º 13
0
int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "Usage: thrash_threads4 input.bam\n");
        exit(1);
    }

    // Find a valid seek location ~64M into the file
    int i;
    ssize_t got;
    BGZF *fpin  = bgzf_open(argv[1], "r");
    uint64_t upos = 0, uend = 0;
    char buf[100000];
    for (i = 0; i < 100; i++) {
        if ((got = bgzf_read(fpin, buf, 65536)) < 0)
            abort();
        upos += got;
    }
    int64_t pos = bgzf_tell(fpin);
    while ((got = bgzf_read(fpin, buf, 65536)) > 0) {
        uend += got;
    }
    if (got < 0) abort();
    int64_t end = bgzf_tell(fpin);
    bgzf_close(fpin);

    // Ensure input is big enough to avoid case 3,4 below going off the end
    // of the file
    if (uend < upos + 10000000) {
        fprintf(stderr, "Please supply a bigger input file\n");
        exit(1);
    }

#define N 1000

    // Spam random seeks & reads
    for (i = 0; i < 1000; i++) {
        printf("i=%d\t", i);
        fpin  = bgzf_open(argv[1], "r");
        int j, eof = 0, mt = 0;
        for (j = 0; j < 80; j++) {
            int n = rand() % 7;
            putchar('0'+n); fflush(stdout);
            switch (n) {
            case 0: // start
                if (bgzf_seek(fpin, 0LL, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 1: // mid
                if (bgzf_seek(fpin, pos, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 2: // eof
                if (bgzf_seek(fpin, end, SEEK_SET) < 0) puts("!");//abort();
                eof = 1;
                break;
            case 3: case 4: {
                int l = rand()%(n==3?100000:100);
                if (bgzf_read(fpin, buf, l) != l*(1-eof)) abort();
                break;
            }
            case 5:
                usleep(N);
                break;
            case 6:
                if (!mt)
                    bgzf_mt(fpin, 8, 256);
                mt = 1;
                break;
            }
        }
        printf("\n");
        if (bgzf_close(fpin))
            abort();
    }

    return 0;
}
Exemplo n.º 14
0
int main(int argc, char **argv)
{
	int c, compress, pstdout, is_forced;
	BGZF *fp;
	void *buffer;
	long start, end, size;

	compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
	while((c  = getopt(argc, argv, "cdhfb:s:")) >= 0){
		switch(c){
		case 'h': return bgzip_main_usage();
		case 'd': compress = 0; break;
		case 'c': pstdout = 1; break;
		case 'b': start = atol(optarg); break;
		case 's': size = atol(optarg); break;
		case 'f': is_forced = 1; break;
		}
	}
	if (size >= 0) end = start + size;
	if (end >= 0 && end < start) {
		fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
		return 1;
	}
	if (compress == 1) {
		struct stat sbuf;
		int f_src = fileno(stdin);
		int f_dst = fileno(stdout);

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 ) 
			{ 
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1; 
			}

			if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}

			if (pstdout)
				f_dst = fileno(stdout);
			else
			{
				char *name = malloc(strlen(argv[optind]) + 5);
				strcpy(name, argv[optind]);
				strcat(name, ".gz");
				f_dst = write_open(name, is_forced);
				if (f_dst < 0) return 1;
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdout)) )
			return bgzip_main_usage();

		fp = bgzf_fdopen(f_dst, "w");
		buffer = malloc(WINDOW_SIZE);
		while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
			if (bgzf_write(fp, buffer, c) < 0) fail(fp);
		// f_dst will be closed here
		if (bgzf_close(fp) < 0) fail(fp);
		if (argc > optind && !pstdout) unlink(argv[optind]);
		free(buffer);
		close(f_src);
		return 0;
	} else {
		struct stat sbuf;
		int f_dst;

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 )
			{
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}
			char *name;
			int len = strlen(argv[optind]);
			if ( strcmp(argv[optind]+len-3,".gz") )
			{
				fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
				return 1;
			}
			fp = bgzf_open(argv[optind], "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
				return 1;
			}

			if (pstdout) {
				f_dst = fileno(stdout);
			}
			else {
				name = strdup(argv[optind]);
				name[strlen(name) - 3] = '\0';
				f_dst = write_open(name, is_forced);
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdin)) )
			return bgzip_main_usage();
		else
		{
			f_dst = fileno(stdout);
			fp = bgzf_fdopen(fileno(stdin), "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
				return 1;
			}
		}
		buffer = malloc(WINDOW_SIZE);
		if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
		while (1) {
			if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
			else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
			if (c == 0) break;
			if (c < 0) fail(fp);
			start += c;
			write(f_dst, buffer, c);
			if (end >= 0 && start >= end) break;
		}
		free(buffer);
		if (bgzf_close(fp) < 0) fail(fp);
		if (!pstdout) unlink(argv[optind]);
		return 0;
	}
}
Exemplo n.º 15
0
perfst * perfst_init(char *fname){
  perfst *ret = new perfst ;
  ret->nSites =0;
  size_t clen;
  if(!fexists(fname)){
    fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname);
    exit(0);
  }
  FILE *fp = NULL;
  fp=fopen(fname,"r");
  if(fp==NULL){
    fprintf(stderr,"\t-> Problem opening file:%s\n",fname);
    exit(0);
  }
  char buf[8];
  assert(fread(buf,1,8,fp)==8);
  ret->version=fstversion(fname);
  //read names
  size_t nit=0;
  assert(fread(&nit,sizeof(size_t),1,fp)==1);
  //fprintf(stderr,"nit:%lu\n",nit);
  for(int i=0;i<nit;i++){
    size_t clen;
    assert(fread(&clen,sizeof(size_t),1,fp)==1);
    //fprintf(stderr,"clen:%lu\n",clen);
    char *nam =(char*) calloc(clen+1,1);
    assert(fread(nam,sizeof(char),clen,fp)==clen);
    ret->names.push_back(nam);
  }
#if 1  
  while(fread(&clen,sizeof(size_t),1,fp)){
    char *chr = (char*)calloc(clen+1,1);
    unsigned a =(unsigned) fread(chr,1,clen,fp);
    assert(clen==a);    
    dat d;
    if(1!=fread(&d.nSites,sizeof(size_t),1,fp)){
      fprintf(stderr,"[%s.%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    //    exit(0);
    ret->nSites += d.nSites;
    if(1!=fread(&d.off,sizeof(int64_t),1,fp)){
      fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    myFstMap::iterator it = ret->mm.find(chr);
    if(it==ret->mm.end())
      ret->mm[chr] =d ;
    else{
      fprintf(stderr,"Problem with chr: %s, key already exists\n",chr);
      exit(0);
    }  
  }
#endif
  fclose(fp);
  char *tmp =(char*)calloc(strlen(fname)+100,1);//that should do it
  tmp=strncpy(tmp,fname,strlen(fname)-3);
  // fprintf(stderr,"tmp:%s\n",tmp);
  
  char *tmp2 = (char*)calloc(strlen(fname)+100,1);//that should do it
  snprintf(tmp2,strlen(fname)+100,"%sgz",tmp);
  fprintf(stderr,"\t-> Assuming .fst.gz file: %s\n",tmp2);
  if(ret->version!=fstversion(tmp2)){
    fprintf(stderr,"\t-> Version mismatch: %d %d\n",ret->version,fstversion(tmp2));
    return NULL;
  }
  ret->fp = bgzf_open(tmp2,"r");
  bgzf_seek(ret->fp,8,SEEK_SET);

  free(tmp);
  free(tmp2);
  //  writefst_header(stderr,ret);
  return ret;
 }
Exemplo n.º 16
0
/**
 * Create single chromosome index file
 * the file content is a 2-column matrix of int64_t type
 * line1:  num_sample  num_marker
 * line2:  0           bgzf_offset_for_#CHROM_line
 * line3:  var_1_pos   bgzf_offset_for_var_1
 * ...
 */
int SingleChromosomeBCFIndex::createIndex() {
  // const char* fn = bcfFile_.c_str();
  BGZF* fp = fBcfFile_;  // bgzf_open(fn, "rb");
  bgzf_seek(fp, 0, SEEK_SET);

  // check magic number
  char magic[5];
  if (5 != bgzf_read(fp, magic, 5)) {
    return -1; // exit(1);
  }
  if (!(magic[0] == 'B' && magic[1] == 'C' && magic[2] == 'F' &&
        magic[3] == 2 && (magic[4] == 1 || magic[4] == 2))) {
    return -1; // exit(1);
  }

  // read header
  uint32_t l_text;
  if (4 != bgzf_read(fp, &l_text, 4)) {
    return -1; // exit(1);
  }
  Rprintf("l_text = %d\n", l_text);

  std::string s;
  int64_t bgzf_offset_before_header = bgzf_tell(fp); // the beginning of header block
  s.resize(l_text);
  if (bgzf_read(fp, (void*)s.data(), l_text) != l_text) {
    REprintf( "Read failed!\n");
  }
  BCFHeader bcfHeader;
  if (bcfHeader.parseHeader(s,
                  &bcfHeader.header_contig_id,
                  &bcfHeader.header_id,
                  &bcfHeader.header_number,
                  &bcfHeader.header_type,
                  &bcfHeader.header_description)) {
    REprintf( "Parse header failed!\n");
    return -1; // exit(1);
  }

  // locate #CHROM line
  int64_t bgzf_offset_after_header = bgzf_tell(fp); // the end of header block
  size_t ptr_chrom_line = s.find("#CHROM"); // the index of "#CHROM", also the size between beginning of header to '#CHROM'
  if (ptr_chrom_line == std::string::npos) {
    REprintf( "Cannot find the \"#CHROM\" line!\n");
    return -1; // exit(1);
  }
  Rprintf("offset_header = %d\n", (int) ptr_chrom_line);

  bgzf_seek(fp, bgzf_offset_before_header, SEEK_SET); // rewind fp to the beginning of header
  s.resize(ptr_chrom_line);
  int64_t before_chrom_size = bgzf_read(fp, (void*) s.data(), ptr_chrom_line);
  int64_t bgzf_offset_before_chrom = bgzf_tell(fp); // the offset to #CHROM
  s.resize(l_text - before_chrom_size);
  int64_t after_chrom_size = bgzf_read(fp, (void*) s.data(), l_text - before_chrom_size);
  // load sample names
  while (s.back() == '\n' || s.back() == '\0') {
    s.resize(s.size() - 1);
  }
  stringTokenize(s, "\t", &bcfHeader.sample_names);
  const int64_t num_sample = (int)bcfHeader.sample_names.size() - 9; // vcf header has 9 columns CHROM...FORMAT before actual sample names
  Rprintf("sample size = %ld\n", num_sample);
  Rprintf("last character is s[after_chrom_size-1] = %d\n", s[after_chrom_size - 1]); // should be 0, the null terminator character
  // quality check
  if (bgzf_offset_after_header != bgzf_tell(fp)) {
    REprintf( "Messed up bgzf header\n");
    return -1; // exit(1);
  }

  // create index file
  FILE* fIndex = fopen(indexFile_.c_str(), "wb");
  int64_t num_marker = 0;
  int64_t pos = 0;
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fwrite(&pos, sizeof(int64_t), 1, fIndex);
  fwrite(&bgzf_offset_before_chrom, sizeof(int64_t), 1, fIndex);

  uint32_t l_shared;
  uint32_t l_indiv;
  std::vector<char> data;
  int64_t offset;
  do {
    offset = bgzf_tell(fp);
    if (4 != bgzf_read(fp, &l_shared, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    if (4 != bgzf_read(fp, &l_indiv, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    data.resize(l_shared + l_indiv);
    if (l_shared + l_indiv != bgzf_read(fp, data.data(), (l_shared+l_indiv) * sizeof(char))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    memcpy(&pos, data.data() + 4, 4);
    fwrite(&pos, sizeof(int64_t), 1, fIndex);
    fwrite(&offset, sizeof(int64_t), 1, fIndex);

    num_marker++;
    if (num_marker % 10000 == 0) {
      Rprintf("\rprocessed %ld markers", num_marker);
    }
  } while (true);

  if (fseek(fIndex, 0, SEEK_SET)) {
    REprintf( "fseek failed\n!");
  }
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fclose(fIndex);
  Rprintf("Indexing finished with %ld samples and %ld markers\n", num_sample, num_marker);

  return 0;
}
Exemplo n.º 17
0
void key_fastq_rewind(void *data)
{
    BGZF *fastq_file = (BGZF *) data;
    bgzf_seek( fastq_file, 0, SEEK_SET );
}
Exemplo n.º 18
0
int main(int argc, char** argv) {
  const char* fBG = argv[1];
  const char* fIndex = argv[2];
  int64_t pos = strtol(argv[3], NULL, 0);
  // const int Nrecord = 10;

  // read everything
  MmapFile mmapFile;
  mmapFile.open(fIndex);
  size_t Nrecord = mmapFile.getFileSize() / 16 - 1;
  Record* r = (Record*)mmapFile.data;

  // FILE* fp = fopen(fIndex, "rb");
  // if (Nrecord != fread(r, sizeof(Record), Nrecord, fp)) {
  //   fprintf(stderr, "Read error!\n");
  // }

  // binary search for file position
  int64_t offset = -1;
  Record query;
  query.pos = pos;
  // Comparator comparator;
  Record* lb = std::lower_bound(r, r + Nrecord, query,
                                comparator);  // r[lb].pos >= query.pos
  Record* ub = std::upper_bound(lb, r + Nrecord, query,
                                comparator);  // r[ub].pos > query.pos
  for (Record* pi = lb; pi != ub; ++pi) {
    printf("%ld %ld\n", pi->pos, pi->offset);
    offset = pi->offset;
    // (TODO) only store one virtual offset for now.
    break;
  }

  // int64_t offset = -1;
  // for (int i = 0; i < Nrecord; ++i) {
  //   if (r[i].pos == pos) {
  //     offset = r[i].offset;
  //     break;
  //   }
  // }
  if (offset < 0) {
    fprintf(stderr, "Cannot find position!\n");
  } else {
    printf("found: %ld %ld\n", pos, offset);
  }
  BGZF* fp2 = bgzf_open(fBG, "rb");
  if (bgzf_seek(fp2, offset, SEEK_SET)) {
    fprintf(stderr, "seek error!\n");
  }
  kstring_t* str;
  str = (kstring_t*)calloc(1, sizeof(kstring_t));
  kstring_t& s = *str;
  int ret = bgzf_getline(fp2, '\n', &s);
  if (ret <= 0) {
    fprintf(stderr, "getline error, ret = %d!\n", ret);
  }
  for (size_t i = 0; i < s.l; ++i) {
    if (i >= 50) break;
    printf("%c", s.s[i]);
  }
  printf("\n");

  free(str);
  bgzf_close(fp2);
  // fclose(fp);

  return 0;
}
Exemplo n.º 19
0
value caml_bgzf_seek(value bgzf,value pos) {
	CAMLparam2(bgzf,pos);
	if(bgzf_seek(BGZF_val(bgzf),Int64_val(pos),SEEK_SET) != 0) caml_failwith("BGZF.seek");
	CAMLreturn(Val_unit);
}