Example #1
0
void readGL2(gzFile fp,size_t nSites,int nChr,Matrix<double> &ret){
  //  fprintf(stderr,"[%s] fname:%s nSites:%d nChr:%d\n",__FUNCTION__,fname,nSites,nChr);
  ret.y=nChr+1;
  char buf[LENS];
  size_t i=0;
  while(gzgets(fp,buf,LENS)){
    //strange construct for strtok, because firstcall if with buf, rest with NULL
    for(char *tok = strtok(buf,"\t\n ");tok!=NULL;tok=strtok(NULL,"\t\n ")){
      if(tok[0]=='#')
	continue;
      gzFile gz = getGz(buf);
    
      while(SIG_COND&&gzread(gz,ret.mat[i],sizeof(double)*(nChr+1))){
	for(size_t j=0;j<nChr+1;j++)
	  ret.mat[i][j] = exp(ret.mat[i][j]);
	i++;
      }
      fprintf(stderr,"Done reading file: \'%s\'\n",buf);
      gzclose(gz);
      ret.x=i;
    }
  }
  if(SIG_COND==0)
    exit(0);
  
}
Example #2
0
gzFile openFileGz(const char* a,const char* b,const char *mode){
  if(0)
    fprintf(stderr,"[%s] %s%s\n",__FUNCTION__,a,b);
  char *c = new char[strlen(a)+strlen(b)+1];
  strcpy(c,a);
  strncat(c,b,strlen(b));
  //  fprintf(stderr,"\t-> Dumping file: %s\n",c);
  dumpedFiles.push_back(strdup(c));
  gzFile fp = getGz(c,mode);
  delete [] c;
  return fp;
}
Example #3
0
size_t calcNsites(const char *fname,int nChr){
  //  fprintf(stderr,"[%s] fname: %s nChr: %d isList:%d\n",__FUNCTION__,fname,nChr,isList);
  if(isList==0)
    return subfun(fname,nChr);
  else{
    gzFile gz=getGz(fname);
    char buf[LENS];
    size_t tot=0;
    while(gzgets(gz,buf,LENS)){
      //  fprintf(stderr,"buf:%s\n",buf);
      for(char *tok = strtok(buf,"\n\t ");tok!=NULL;tok=strtok(NULL,"\n\t ")){
	if(tok[0]=='#')
	  continue;
	size_t per_file = subfun(buf,nChr);
	tot+=per_file;
      }
    }
    gzclose(gz);
    return tot;
  }
}
Example #4
0
int main(int argc,char**argv){
  
  if(argc!=5){
    fprintf(stderr,"\nProgram extract ranges from output of msToGlf\n./splitgl file.glf.gz nindTotal firstInd lastInd\n");
    fprintf(stderr,"\nTo extract first the GLS for the first 10 samples in an glf.gz that contains 25 samples\n");
    fprintf(stderr,"Examples\n\n\t1) Extract sample 1 to 12 from a glf.gz file containing 20samples:\n\t\t\t./splitgl raw.glf.gz 20 1 12\n");
    fprintf(stderr,"\t2) Extract sample 1 to 12 from a glf.gz file containing 20samples:\n\t\t\t./splitgl raw.glf.gz 20 13 20\n");
    return 0;
  }
  
  const char *fname = argv[1];
  int tot = atoi( argv[2]);
  int first = atoi( argv[3]);
  int last = atoi( argv[4]);
  first--;last--;

  fprintf(stderr,"fname:%s tot:%d first:%d last:%d\n",fname,tot,first,last);

  gzFile gz=getGz(fname,"r");
  double *lk = malloc(sizeof(double)*10*tot);
  int nsites =0;
  while(1){
    int br=gzread(gz,lk,10*sizeof(double)*tot);
    nsites++;
    if(br==0)
      break;
    assert(br==10*sizeof(double)*tot);
    int beg=10*first;
    int nDoubles=(last-first+1)*10;
    //   fprintf(stderr,"beg: %d nDoubles:%d\n",beg,nDoubles);return 0;
    br=fwrite(lk+beg,sizeof(double),nDoubles,stdout);
    if(br!=nDoubles){
      fprintf(stderr,"Problem writing full chunk\n");
      return 0;
    }
  }
  fprintf(stderr,"nsites:%d processed\n",nsites);
  free(lk);
  gzclose(gz);
}
Example #5
0
int main_2dsfs(int argc,char **argv){
  if(argc==1){
    fprintf(stderr,"./emOptim2 2dsfs pop1 pop2 nChr1 nChr2 [-start FNAME -P nThreds -tole tole -maxIter ] (only works if the two saf files covers the same region)\n");
    return 0;
  }
  argv++;
  argc--;
  fname1 = *(argv++);
  fname2 = *(argv++);
  argc -=2;
  chr1 = atoi(*(argv++));
  chr2 = atoi(*(argv++));
  argc -=2;
  getArgs(argc,argv);
  if(nSites==0){
    if(fsize(fname1)+fsize(fname2)>getTotalSystemMemory())
      fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n");
    //this doesnt make sense if ppl supply a filelist containing safs
     nSites=calcNsites(fname1,chr1);
  }
  fprintf(stderr,"fname1:%sfname2:%s chr1:%d chr2:%d startsfs:%s nThreads=%d tole=%f maxIter=%d nSites:%lu\n",fname1,fname2,chr1,chr2,sfsfname,nThreads,tole,maxIter,nSites);
  float bytes_req_megs = nSites*(sizeof(double)*(chr1+1) + sizeof(double)*(chr2+1)+2*sizeof(double*))/1024/1024;
  float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile
  //  fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs);
  fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs);
  
#if 0
  //read in positions, not used, YET...
  std::vector<int> p1 = getPosi(fname1);
  std::vector<int> p2 = getPosi(fname2);
  fprintf(stderr,"nSites in pop1: %zu nSites in pop2: %zu\n",p1.size(),p2.size());
#endif

  if(nSites==0){
    if(calcNsites(fname1,chr1)!=calcNsites(fname2,chr2)){
      fprintf(stderr,"Problem with number of sites in file: %s and %s\n",fname1,fname2);
      exit(0);
    }
    nSites=calcNsites(fname1,chr1);
  }
  gzFile gz1=getGz(fname1);
  gzFile gz2=getGz(fname2);
  
  dim=(chr1+1)*(chr2+1);
  
  Matrix<double> GL1=alloc(nSites,chr1+1);
  Matrix<double> GL2=alloc(nSites,chr2+1);
  dim=GL1.y*GL2.y;
  
  double *sfs = new double[dim];
  while(1){
    if(isList ==0){
      readGL(gz1,nSites,chr1,GL1);
      readGL(gz2,nSites,chr2,GL2);
    }else{
      readGL2(gz1,nSites,chr1,GL1);
      readGL2(gz2,nSites,chr2,GL2);
    }
      
    assert(GL1.x==GL2.x);
    if(GL1.x==0)
      break;
    
    if(sfsfname!=NULL){
      readSFS(sfsfname,dim,sfs);
    }else{
      for(int i=0;i<dim;i++)
	sfs[i] = (i+1)/((double)dim);
      normalize(sfs,dim);
    }
    
    setThreadPars(&GL1,&GL2,sfs,nThreads);
    if(calcLike==0){
      if(SIG_COND) 
	em2(sfs,&GL1,&GL2,tole,maxIter);
    }
    double lik;
    if(nThreads>1)
      lik = lik1_master();
    else
      lik = lik1(sfs,&GL1,0,GL1.x);
      
    fprintf(stderr,"likelihood: %f\n",lik);
#if 1
    int inc=0;
    for(int x=0;x<chr1+1;x++){
      for(int y=0;y<chr2+1;y++)
	fprintf(stdout,"%f ",log(sfs[inc++]));
      fprintf(stdout,"\n");
    }
#endif
    if(isList==1)
      break;
  }
  dalloc(GL1,nSites);
  dalloc(GL2,nSites);
  gzclose(gz1);
  gzclose(gz2);
  return 0;
}
Example #6
0
int main_1dsfs(int argc,char **argv){
  if(argc<2){
    fprintf(stderr,"Must supply afile.saf and number of chromosomes\n");
    return 0;
  }
  fname1 = *(argv++);
  chr1 = atoi(*(argv++));
  argc-=2;
 
  getArgs(argc,argv);
  dim=chr1+1;
  //hook for new EJ banded version
  if(isNewFormat(fname1))
    return main_1dsfs_v2(fname1,chr1,nSites,nThreads,sfsfname,tole,maxIter);

  if(nSites==0){//if no -nSites is specified
    if(fsize(fname1)>getTotalSystemMemory())
      fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n");
    //this doesnt make sense if ppl supply a filelist containing safs
    nSites=calcNsites(fname1,chr1);
  }
  fprintf(stderr,"fname1:%s nChr:%d startsfs:%s nThreads:%d tole=%f maxIter=%d nSites=%lu\n",fname1,chr1,sfsfname,nThreads,tole,maxIter,nSites);
  float bytes_req_megs = nSites*(sizeof(double)*(chr1+1)+sizeof(double*))/1024/1024;
  float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile
  //  fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs);
  fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs);

  

  Matrix<double> GL1=alloc(nSites,dim);
  gzFile gz1=getGz(fname1);  
  double *sfs=new double[dim];
  
  while(1) {
    if(isList==0)
      readGL(gz1,nSites,chr1,GL1);
    else
      readGL2(gz1,nSites,chr1,GL1);
    if(GL1.x==0)
      break;
    fprintf(stderr,"dim(GL1)=%zu,%zu\n",GL1.x,GL1.y);
   
    
  
    if(sfsfname!=NULL){
      readSFS(sfsfname,dim,sfs);
    }else{
      
      for(int i=0;i<dim;i++)
	sfs[i] = (i+1)/((double)dim);
      if(doBFGS){
	double ts=1;
	for(int i=0;i<dim-1;i++)
	  ts += 0.01/(1.0+i);
	sfs[0]=1.0/ts;
	for(int i=0;i<dim-1;i++)
	  sfs[i+1]  = (0.01/(1.0+i))/ts;
      }
      normalize(sfs,dim);
    }
    //  em2_smart(sfs2,pops,1e-6,1e3);
    setThreadPars(&GL1,NULL,sfs,nThreads);
    if(calcLike==0){
      if(doBFGS==0) 
	em1(sfs,&GL1,tole,maxIter);
      else
	bfgs(sfs,&GL1);
    }
    double lik;
    if(nThreads>1)
      lik = lik1_master();
    else
      lik = lik1(sfs,&GL1,0,GL1.x);
      
    fprintf(stderr,"likelihood: %f\n",lik);
#if 1
    for(int x=0;x<dim;x++)
      fprintf(stdout,"%f ",log(sfs[x]));
    fprintf(stdout,"\n");
    fflush(stdout);
#endif
    if(isList==1)
      break;
  }
  dalloc(GL1,nSites);
  gzclose(gz1);
  delete [] sfs;
  return 0;
}
Example #7
0
int main(int argc, char **argv){

  fprintf(stderr,"NGSrelate buildtime: (%s:%s)\n",__DATE__,__TIME__);
  if(argc==1){// if no arguments, print info on program
    info();
    return 0;
  }
  //below for catching ctrl+c, and dumping files
  struct sigaction sa;
  sigemptyset (&sa.sa_mask);
  sa.sa_flags = 0;
  sa.sa_handler = handler;
  sigaction(SIGPIPE, &sa, 0);
  sigaction(SIGINT, &sa, 0);  

  //initial values
  char *bfile = NULL;
  char *binfile = NULL;
  char *outfiles = NULL;
  
  double a,k0,k1,k2;
  a=k0=k1=-1;
  k2=0;
  
  int calcA = 1;
  

  //filter opts
  double minMaf = 0.01;
  char *freqfile =NULL;
  // reading arguments
  argv++;
  while(*argv){
    if(strcmp(*argv,"-beagle")==0 )  bfile=*++argv; 
    else if(strcmp(*argv,"-bin")==0 )  binfile=*++argv; 
    else if(strcmp(*argv,"-freqfile")==0 )  freqfile=*++argv; 
    else if(strcmp(*argv,"-outnames")==0 )  outfiles=*++argv; 
    else if(strcmp(*argv,"-minMaf")==0 )  minMaf = atoi(*++argv); 
    else if(strcmp(*argv,"-a")==0 )  a = atof(*++argv); 
    else if(strcmp(*argv,"-k0")==0 )  k0 = atof(*++argv); 
    else if(strcmp(*argv,"-k1")==0 )  k1 = atof(*++argv); 
    else if(strcmp(*argv,"-k2")==0 )  k2 = atof(*++argv); 
    else if(strcmp(*argv,"-calcA")==0 )  calcA = atoi(*++argv); 
    else{
      fprintf(stderr,"Unknown arg:%s\n",*argv);
      info();
      return 0;
    }
    ++argv;
  }
  if(bfile==NULL&&binfile==NULL){
    fprintf(stderr,"Please supply input data file: -beagle OR -bin");
    info();
  }else if(bfile!=NULL&&binfile!=NULL){
    fprintf(stderr,"Please supply input data file: -beagle OR -bin");
    info();
  }
  if(outfiles==NULL){
    if(bfile!=NULL)
      outfiles=bfile;
    else
      outfiles = binfile;
    fprintf(stderr,"Will use: %s as prefix for output\n",outfiles);
   
  }
  FILE *flog=openFile(outfiles,".log");

  clock_t t=clock();//how long time does the run take
  time_t t2=time(NULL);
  
  std::vector<perChr> pd;
  if(bfile!=NULL){
    bgl d=readBeagle(bfile);
    fprintf(stderr,"Input beaglefile has dim: nsites=%d nind=%d\n",d.nSites,d.nInd);
    pd = makeDat(d);
    gzFile dfile = openFileGz(outfiles,".bin.gz","wb");
    for(uint i=0;i<pd.size();i++)
      marshall_dump(dfile,pd[i]);
    gzclose(dfile);
  }else{
    gzFile dfile = getGz(binfile,"rb");
    pd = marshall_read(dfile);

    gzclose(dfile);
  }
  //calculate frequencies
  for(uint i=0;i<pd.size();i++)
    setFreq(pd[i]);


  //  printStuff(pd);
  //set seed
  srand(seed);
  
  //below is the main looping trhought the iterations.
  FILE *fp =openFile(outfiles,".freq");
  for(int i=0;i<pd[0].nSites;i++)
    fprintf(fp,"%f ",exp(pd[0].freq[i]));
  fclose(fp);
  
  
  double *freq = NULL;
  if(freqfile!=NULL){
    freq = readDouble(freqfile,pd[0].nSites);
    fprintf(stderr,"freq=%f\n",freq[0]);
  }
  para p;
  p.pair[0] = 0;p.pair[1] = 1;
  p.a=a;p.k0=k0;p.k1=k1;p.k2=k2;
  fprintf(stderr,"pa=%f pk0=%f\n",p.a,p.k0);

  hmm res = analysis(pd[0],freq,p,calcA);
  gzFile bo = openFileGz(outfiles,".bres.gz","wb");
  fdump(bo,res,pd[0].name);
  

  //deallocate memory
  
  for(int i=0;1&&i<dumpedFiles.size();i++){
    fprintf(stderr,"dumpedfiles are: %s\n",dumpedFiles[i]);
    free(dumpedFiles[i]);
  }
  fprintf(stderr, "\t[ALL done] cpu-time used =  %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
  fprintf(stderr, "\t[ALL done] walltime used =  %.2f sec\n", (float)(time(NULL) - t2));  


  // print to log file

  fprintf(flog, "\t[ALL done] cpu-time used =  %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
  fprintf(flog, "\t[ALL done] walltime used =  %.2f sec\n", (float)(time(NULL) - t2));  
  fclose(flog); 
  gzclose(bo);
  
  return 0;

}