Ejemplo n.º 1
0
 //chr start stop is given from commandine
 myMap::iterator iter_init(perpsmc *pp,char *chr,int start,int stop){
   assert(chr!=NULL);
   myMap::iterator it = pp->mm.find(chr);
   if(it==pp->mm.end()){
     fprintf(stderr,"\t-> [%s] Problem finding chr: %s\n",__FUNCTION__,chr);
     return it;
   }
   my_bgzf_seek(pp->bgzf_gls,it->second.saf,SEEK_SET);
   my_bgzf_seek(pp->bgzf_pos,it->second.pos,SEEK_SET);
   //fprintf(stderr,"pp->gls:%p\n",pp->gls);
   if(pp->pos)
     delete [] pp->pos;
   if(pp->gls)
     delete [] pp->gls;
   pp->pos = new int[it->second.nSites];
   my_bgzf_read(pp->bgzf_pos,pp->pos,sizeof(int)*it->second.nSites);
   pp->gls = new double[2*it->second.nSites];//<-valgrind complains about large somthing
   my_bgzf_read(pp->bgzf_gls,pp->gls,2*sizeof(double)*it->second.nSites);
   //   fprintf(stderr," end: %f %f\n",pp->gls[0],pp->gls[1]);
   pp->first=0;
   if(start!=-1)
     while(pp->first<it->second.nSites&&pp->pos[pp->first]<start)
       pp->first++;
   
   pp->last = it->second.nSites;
   if(stop!=-1&&stop<=pp->pos[pp->last-1]){
     pp->last=pp->first;
     while(pp->pos[pp->last]<stop) 
       pp->last++;
   }
   return it;
 }
Ejemplo n.º 2
0
perpsmc * perpsmc_init(char *fname){
  perpsmc *ret = new perpsmc ;
  ret->fname = strdup(fname);
  ret->gls =NULL;
  ret->pos = NULL;
  ret->bgzf_pos=ret->bgzf_gls=NULL;
  ret->pos = NULL;
  size_t clen;
  if(!fexists(fname)){
    fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname);
    exit(0);
  }
  FILE *fp = NULL;
  fp=fopen(fname,"r");
  if(fp==NULL){
    fprintf(stderr,"\t-> Problem opening file:%s\n",fname);
    exit(0);
  }
  char buf[8];
  assert(fread(buf,1,8,fp)==8);
  ret->version = psmcversion(fname);
  fprintf(stderr,"\t-> Version of fname: \'%s\' is:%d\n",fname,ret->version);
  if(ret->version!=1){
    fprintf(stderr,"\t-> Looks like you are trying to use a version of PSMC that does not exists\n");
    exit(0);
  }
  ret->nSites =0;
  while(fread(&clen,sizeof(size_t),1,fp)){
    char *chr = (char*)malloc(clen+1);
    assert(clen==fread(chr,1,clen,fp));
    chr[clen] = '\0';
    
    datum d;
    if(1!=fread(&d.nSites,sizeof(size_t),1,fp)){
      fprintf(stderr,"[%s.%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    ret->nSites += d.nSites;
    if(1!=fread(&d.pos,sizeof(int64_t),1,fp)){
      fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    if(1!=fread(&d.saf,sizeof(int64_t),1,fp)){
      fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
  
    myMap::iterator it = ret->mm.find(chr);
    if(it==ret->mm.end())
      ret->mm[chr] =d ;
    else{
      fprintf(stderr,"Problem with chr: %s, key already exists, psmc file needs to be sorted. (sort your -rf that you used for input)\n",chr);
      exit(0);
    }
  }
  fclose(fp);
  char *tmp =(char*)calloc(strlen(fname)+100,1);//that should do it
  tmp=strncpy(tmp,fname,strlen(fname)-3);
  //  fprintf(stderr,"tmp:%s\n",tmp);
  
  char *tmp2 = (char*)calloc(strlen(fname)+100,1);//that should do it
  snprintf(tmp2,strlen(fname)+100,"%sgz",tmp);
  fprintf(stderr,"\t-> Assuming .psmc.gz file: %s\n",tmp2);
  ret->bgzf_gls = bgzf_open(tmp2,"r");
  if(ret->bgzf_gls)
    my_bgzf_seek(ret->bgzf_gls,8,SEEK_SET);
  if(ret->bgzf_gls && ret->version!=psmcversion(tmp2)){
    fprintf(stderr,"\t-> Problem with mismatch of version of %s vs %s %d vs %d\n",fname,tmp2,ret->version,psmcversion(tmp2));
    exit(0);
  }

  snprintf(tmp2,strlen(fname)+100,"%spos.gz",tmp);
  fprintf(stderr,"\t-> Assuming .psmc.pos.gz: %s\n",tmp2);
  ret->bgzf_pos = bgzf_open(tmp2,"r");
  if(ret->pos)
    my_bgzf_seek(ret->bgzf_pos,8,SEEK_SET);
  if(ret->bgzf_pos&& ret->version!=psmcversion(tmp2)){
    fprintf(stderr,"Problem with mismatch of version of %s vs %s\n",fname,tmp2);
    exit(0);
  }
  //assert(ret->pos!=NULL&&ret->saf!=NULL);
  free(tmp);free(tmp2);
  
 return ret;
 }
Ejemplo n.º 3
0
int fst_stat(int argc,char **argv){
  
  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  args *pars = getArgs(--argc,++argv);  
  int *ppos = NULL;
  int chs = choose((int)pf->names.size(),2);
  // fprintf(stderr,"choose:%d \n",chs);
  double **ares = new double*[chs];
  double **bres = new double*[chs];
  double unweight[chs];
  double wa[chs];
  double wb[chs];
  size_t nObs[chs];
  for(int i=0;i<chs;i++){
    unweight[i] = wa[i] = wb[i] =0.0;
    nObs[i] = 0;
  }
  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    if(it->second.nSites==0)
      continue;
    my_bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<choose((int)pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    


    size_t first=0;
    if(pars->start!=-1)
      while(ppos[first]<pars->start) 
	first++;
    
    size_t last=it->second.nSites;

    if(pars->stop!=-1&&pars->stop<=ppos[last-1]){
      last=first;
      
      while(ppos[last]<pars->stop) 
	last++;
    }

    //  fprintf(stderr,"pars->stop:%d ppos:%d first:%d last:%d\n",pars->stop,ppos[last-1],first,last);

    for(size_t s=first;s<last;s++){
#if 0
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose(pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
#endif
      for(int i=0;i<choose((int)pf->names.size(),2);i++){
	if(bres[i][s]!=0){
	  unweight[i] += ares[i][s]/bres[i][s];
	  nObs[i]++;
	}
	wa[i] += ares[i][s];
	wb[i] += bres[i][s];
      }
    }
    for(int i=0;i<choose((int)pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
  double fstUW[chs];
  double fstW[chs];
  for(int i=0;i<chs;i++){
    fstUW[i] = unweight[i]/(1.0*((double)nObs[i]));
    fstW[i] = wa[i]/wb[i];
    fprintf(stderr,"\t-> FST.Unweight[nObs:%lu]:%f Fst.Weight:%f\n",nObs[i],fstUW[i],fstW[i]);
    fprintf(stdout,"%f %f\n",fstUW[i],fstW[i]);
  }
  if(chs==3){
    //if chr==3 then we have 3pops and we will also calculate pbs statistics
    calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values
    for(int i=0;i<3;i++)
      fprintf(stderr,"\t-> pbs.pop%d\t%f\n",i+1,fstW[i]);
  }
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}
Ejemplo n.º 4
0
int fst_print(int argc,char **argv){

  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  writefst_header(stderr,pf);  
  args *pars = getArgs(--argc,++argv);  
  int *ppos = NULL;
  fprintf(stderr,"choose:%d \n",choose((int)pf->names.size(),2));
  double **ares = new double*[choose((int)pf->names.size(),2)];
  double **bres = new double*[choose((int)pf->names.size(),2)];
  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    if(it->second.nSites==0)
      continue;
    my_bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<choose((int)pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    


    size_t first=0;
    if(pars->start!=-1)
      while(ppos[first]<pars->start) 
	first++;
    
    size_t last=it->second.nSites;

    if(pars->stop!=-1&&pars->stop<=ppos[last-1]){
      last=first;
      while(ppos[last]<pars->stop) 
	last++;
    }

    fprintf(stderr,"pars->stop:%d ppos:%d first:%lu last:%lu\n",pars->stop,ppos[last-1],first,last);

    for(size_t s=first;s<last;s++){
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose((int)pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
    }
    for(int i=0;i<choose((int)pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}
Ejemplo n.º 5
0
int fst_stat2(int argc,char **argv){
  int pS,pE;//physical start,physical end
  int begI,endI;//position in array for pS, pE;
  
  char *bname = *argv;
  fprintf(stderr,"\t-> Assuming idxname:%s\n",bname);
  perfst *pf = perfst_init(bname);
  args *pars = getArgs(--argc,++argv);
  fprintf(stderr,"win:%d step:%d\n",pars->win,pars->step);
  int *ppos = NULL;
  int chs = choose((int)pf->names.size(),2);
  // fprintf(stderr,"choose:%d \n",chs);
  double **ares = new double*[chs];
  double **bres = new double*[chs];
  double unweight[chs];
  double wa[chs];
  double wb[chs];
  size_t nObs =0;
 

  //print header
  fprintf(stdout,"region\tchr\tmidPos\tNsites");
  for(int c1=0;c1<chs-1;c1++)
    for(int c2=c1+1;c2<chs;c2++)
      fprintf(stdout,"\tFst%d%d",c1,c2);
  if(chs==3)
    fprintf(stdout,"\tPBS0\tPBS1\tPBS2");
  
  fprintf(stdout,"\n");


  for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){
    if(pars->chooseChr!=NULL){
      it = pf->mm.find(pars->chooseChr);
      if(it==pf->mm.end()){
	fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr);
	break;
      }
    }
    fprintf(stderr,"nSites:%lu\n",it->second.nSites);
    if(it->second.nSites==0&&pars->chooseChr!=NULL)
      break;
    else if(it->second.nSites==0&&pars->chooseChr==NULL)
      continue;
    my_bgzf_seek(pf->fp,it->second.off,SEEK_SET);
    ppos = new int[it->second.nSites];
    
    my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites);
    for(int i=0;i<it->second.nSites;i++)//what? why? dragon!
      ppos[i]++;

    for(int i=0;i<choose((int)pf->names.size(),2);i++){
      ares[i] = new double[it->second.nSites];
      bres[i] = new double[it->second.nSites];
      my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites);
      my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites);
    }
    

    if(pars->type==0)
      pS = ((pars->start!=-1?pars->start:ppos[0])/pars->step)*pars->step +pars->step;
    else if(pars->type==1)
      pS = (pars->start!=-1?pars->start:ppos[0]);
    else if(pars->type==2)
      pS = 1;
    pE = pS+pars->win;
    begI=endI=0;

    //fprintf(stderr,"ps:%d\n",pS);exit(0);
    if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1])){
    fprintf(stderr,"end of dataset is before end of window: end of window:%d last position in chr:%d\n",pE,ppos[it->second.nSites-1]);
    //    return str;
  }

  while(ppos[begI]<pS) begI++;
  
  endI=begI;
  while(endI<it->second.nSites &&ppos[endI]<pE) endI++;

  //fprintf(stderr,"begI:%d endI:%d\n",begI,endI);

  while(1){
    for(int i=0;i<chs;i++)
      unweight[i] = wa[i] = wb[i] =0.0;
    nObs=0;

    for(int s=begI;s<endI;s++){
#if 0
      fprintf(stdout,"%s\t%d",it->first,ppos[s]+1);
      for(int i=0;i<choose(pf->names.size(),2);i++)
	fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]);
      fprintf(stdout,"\n");
#endif
      for(int i=0;i<choose((int)pf->names.size(),2);i++){
	unweight[i] += ares[i][s]/bres[i][s];
	wa[i] += ares[i][s];
	wb[i] += bres[i][s];
      }
      nObs++;
    }
    if(nObs>0)
      fprintf(stdout,"(%d,%d)(%d,%d)(%d,%d)\t%s\t%d\t%d",begI,endI-1,ppos[begI],ppos[endI-1],pS,pE,it->first,pS+(pE-pS)/2,endI-begI+1);
    double fstW[chs];
    for(int i=0;nObs>0&&i<chs;i++){
      fstW[i] = wa[i]/wb[i];
      //      fprintf(stdout,"\t%f\t%f",unweight[i]/(1.0*nObs),fstW[i]);
      fprintf(stdout,"\t%f",fstW[i]);
    }
    if(nObs>0&&chs==3){
      //if chr==3 then we have 3pops and we will also calculate pbs statistics
      calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values
      for(int i=0;i<3;i++)
	fprintf(stdout,"\t%f",fstW[i]);
    }
    if(nObs>0)
      fprintf(stdout,"\n");

    pS += pars->step;
    pE =pS+pars->win;
    if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1]))
      break;
    
    while(ppos[begI]<pS) begI++;
    while(ppos[endI]<pE) endI++;
  }
  for(int i=0;i<choose((int)pf->names.size(),2);i++){
      delete [] ares[i];
      delete [] bres[i];
    }
    
    delete [] ppos;
    
    if(pars->chooseChr!=NULL)
      break;
  }
 
  delete [] ares;
  delete [] bres;
  destroy_args(pars);
  perfst_destroy(pf);
  return 0;
}