void readGL2(gzFile fp,size_t nSites,int nChr,Matrix<double> &ret){ // fprintf(stderr,"[%s] fname:%s nSites:%d nChr:%d\n",__FUNCTION__,fname,nSites,nChr); ret.y=nChr+1; char buf[LENS]; size_t i=0; while(gzgets(fp,buf,LENS)){ //strange construct for strtok, because firstcall if with buf, rest with NULL for(char *tok = strtok(buf,"\t\n ");tok!=NULL;tok=strtok(NULL,"\t\n ")){ if(tok[0]=='#') continue; gzFile gz = getGz(buf); while(SIG_COND&&gzread(gz,ret.mat[i],sizeof(double)*(nChr+1))){ for(size_t j=0;j<nChr+1;j++) ret.mat[i][j] = exp(ret.mat[i][j]); i++; } fprintf(stderr,"Done reading file: \'%s\'\n",buf); gzclose(gz); ret.x=i; } } if(SIG_COND==0) exit(0); }
gzFile openFileGz(const char* a,const char* b,const char *mode){ if(0) fprintf(stderr,"[%s] %s%s\n",__FUNCTION__,a,b); char *c = new char[strlen(a)+strlen(b)+1]; strcpy(c,a); strncat(c,b,strlen(b)); // fprintf(stderr,"\t-> Dumping file: %s\n",c); dumpedFiles.push_back(strdup(c)); gzFile fp = getGz(c,mode); delete [] c; return fp; }
size_t calcNsites(const char *fname,int nChr){ // fprintf(stderr,"[%s] fname: %s nChr: %d isList:%d\n",__FUNCTION__,fname,nChr,isList); if(isList==0) return subfun(fname,nChr); else{ gzFile gz=getGz(fname); char buf[LENS]; size_t tot=0; while(gzgets(gz,buf,LENS)){ // fprintf(stderr,"buf:%s\n",buf); for(char *tok = strtok(buf,"\n\t ");tok!=NULL;tok=strtok(NULL,"\n\t ")){ if(tok[0]=='#') continue; size_t per_file = subfun(buf,nChr); tot+=per_file; } } gzclose(gz); return tot; } }
int main(int argc,char**argv){ if(argc!=5){ fprintf(stderr,"\nProgram extract ranges from output of msToGlf\n./splitgl file.glf.gz nindTotal firstInd lastInd\n"); fprintf(stderr,"\nTo extract first the GLS for the first 10 samples in an glf.gz that contains 25 samples\n"); fprintf(stderr,"Examples\n\n\t1) Extract sample 1 to 12 from a glf.gz file containing 20samples:\n\t\t\t./splitgl raw.glf.gz 20 1 12\n"); fprintf(stderr,"\t2) Extract sample 1 to 12 from a glf.gz file containing 20samples:\n\t\t\t./splitgl raw.glf.gz 20 13 20\n"); return 0; } const char *fname = argv[1]; int tot = atoi( argv[2]); int first = atoi( argv[3]); int last = atoi( argv[4]); first--;last--; fprintf(stderr,"fname:%s tot:%d first:%d last:%d\n",fname,tot,first,last); gzFile gz=getGz(fname,"r"); double *lk = malloc(sizeof(double)*10*tot); int nsites =0; while(1){ int br=gzread(gz,lk,10*sizeof(double)*tot); nsites++; if(br==0) break; assert(br==10*sizeof(double)*tot); int beg=10*first; int nDoubles=(last-first+1)*10; // fprintf(stderr,"beg: %d nDoubles:%d\n",beg,nDoubles);return 0; br=fwrite(lk+beg,sizeof(double),nDoubles,stdout); if(br!=nDoubles){ fprintf(stderr,"Problem writing full chunk\n"); return 0; } } fprintf(stderr,"nsites:%d processed\n",nsites); free(lk); gzclose(gz); }
int main_2dsfs(int argc,char **argv){ if(argc==1){ fprintf(stderr,"./emOptim2 2dsfs pop1 pop2 nChr1 nChr2 [-start FNAME -P nThreds -tole tole -maxIter ] (only works if the two saf files covers the same region)\n"); return 0; } argv++; argc--; fname1 = *(argv++); fname2 = *(argv++); argc -=2; chr1 = atoi(*(argv++)); chr2 = atoi(*(argv++)); argc -=2; getArgs(argc,argv); if(nSites==0){ if(fsize(fname1)+fsize(fname2)>getTotalSystemMemory()) fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); //this doesnt make sense if ppl supply a filelist containing safs nSites=calcNsites(fname1,chr1); } fprintf(stderr,"fname1:%sfname2:%s chr1:%d chr2:%d startsfs:%s nThreads=%d tole=%f maxIter=%d nSites:%lu\n",fname1,fname2,chr1,chr2,sfsfname,nThreads,tole,maxIter,nSites); float bytes_req_megs = nSites*(sizeof(double)*(chr1+1) + sizeof(double)*(chr2+1)+2*sizeof(double*))/1024/1024; float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile // fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs); fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs); #if 0 //read in positions, not used, YET... std::vector<int> p1 = getPosi(fname1); std::vector<int> p2 = getPosi(fname2); fprintf(stderr,"nSites in pop1: %zu nSites in pop2: %zu\n",p1.size(),p2.size()); #endif if(nSites==0){ if(calcNsites(fname1,chr1)!=calcNsites(fname2,chr2)){ fprintf(stderr,"Problem with number of sites in file: %s and %s\n",fname1,fname2); exit(0); } nSites=calcNsites(fname1,chr1); } gzFile gz1=getGz(fname1); gzFile gz2=getGz(fname2); dim=(chr1+1)*(chr2+1); Matrix<double> GL1=alloc(nSites,chr1+1); Matrix<double> GL2=alloc(nSites,chr2+1); dim=GL1.y*GL2.y; double *sfs = new double[dim]; while(1){ if(isList ==0){ readGL(gz1,nSites,chr1,GL1); readGL(gz2,nSites,chr2,GL2); }else{ readGL2(gz1,nSites,chr1,GL1); readGL2(gz2,nSites,chr2,GL2); } assert(GL1.x==GL2.x); if(GL1.x==0) break; if(sfsfname!=NULL){ readSFS(sfsfname,dim,sfs); }else{ for(int i=0;i<dim;i++) sfs[i] = (i+1)/((double)dim); normalize(sfs,dim); } setThreadPars(&GL1,&GL2,sfs,nThreads); if(calcLike==0){ if(SIG_COND) em2(sfs,&GL1,&GL2,tole,maxIter); } double lik; if(nThreads>1) lik = lik1_master(); else lik = lik1(sfs,&GL1,0,GL1.x); fprintf(stderr,"likelihood: %f\n",lik); #if 1 int inc=0; for(int x=0;x<chr1+1;x++){ for(int y=0;y<chr2+1;y++) fprintf(stdout,"%f ",log(sfs[inc++])); fprintf(stdout,"\n"); } #endif if(isList==1) break; } dalloc(GL1,nSites); dalloc(GL2,nSites); gzclose(gz1); gzclose(gz2); return 0; }
int main_1dsfs(int argc,char **argv){ if(argc<2){ fprintf(stderr,"Must supply afile.saf and number of chromosomes\n"); return 0; } fname1 = *(argv++); chr1 = atoi(*(argv++)); argc-=2; getArgs(argc,argv); dim=chr1+1; //hook for new EJ banded version if(isNewFormat(fname1)) return main_1dsfs_v2(fname1,chr1,nSites,nThreads,sfsfname,tole,maxIter); if(nSites==0){//if no -nSites is specified if(fsize(fname1)>getTotalSystemMemory()) fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); //this doesnt make sense if ppl supply a filelist containing safs nSites=calcNsites(fname1,chr1); } fprintf(stderr,"fname1:%s nChr:%d startsfs:%s nThreads:%d tole=%f maxIter=%d nSites=%lu\n",fname1,chr1,sfsfname,nThreads,tole,maxIter,nSites); float bytes_req_megs = nSites*(sizeof(double)*(chr1+1)+sizeof(double*))/1024/1024; float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile // fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs); fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs); Matrix<double> GL1=alloc(nSites,dim); gzFile gz1=getGz(fname1); double *sfs=new double[dim]; while(1) { if(isList==0) readGL(gz1,nSites,chr1,GL1); else readGL2(gz1,nSites,chr1,GL1); if(GL1.x==0) break; fprintf(stderr,"dim(GL1)=%zu,%zu\n",GL1.x,GL1.y); if(sfsfname!=NULL){ readSFS(sfsfname,dim,sfs); }else{ for(int i=0;i<dim;i++) sfs[i] = (i+1)/((double)dim); if(doBFGS){ double ts=1; for(int i=0;i<dim-1;i++) ts += 0.01/(1.0+i); sfs[0]=1.0/ts; for(int i=0;i<dim-1;i++) sfs[i+1] = (0.01/(1.0+i))/ts; } normalize(sfs,dim); } // em2_smart(sfs2,pops,1e-6,1e3); setThreadPars(&GL1,NULL,sfs,nThreads); if(calcLike==0){ if(doBFGS==0) em1(sfs,&GL1,tole,maxIter); else bfgs(sfs,&GL1); } double lik; if(nThreads>1) lik = lik1_master(); else lik = lik1(sfs,&GL1,0,GL1.x); fprintf(stderr,"likelihood: %f\n",lik); #if 1 for(int x=0;x<dim;x++) fprintf(stdout,"%f ",log(sfs[x])); fprintf(stdout,"\n"); fflush(stdout); #endif if(isList==1) break; } dalloc(GL1,nSites); gzclose(gz1); delete [] sfs; return 0; }
int main(int argc, char **argv){ fprintf(stderr,"NGSrelate buildtime: (%s:%s)\n",__DATE__,__TIME__); if(argc==1){// if no arguments, print info on program info(); return 0; } //below for catching ctrl+c, and dumping files struct sigaction sa; sigemptyset (&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = handler; sigaction(SIGPIPE, &sa, 0); sigaction(SIGINT, &sa, 0); //initial values char *bfile = NULL; char *binfile = NULL; char *outfiles = NULL; double a,k0,k1,k2; a=k0=k1=-1; k2=0; int calcA = 1; //filter opts double minMaf = 0.01; char *freqfile =NULL; // reading arguments argv++; while(*argv){ if(strcmp(*argv,"-beagle")==0 ) bfile=*++argv; else if(strcmp(*argv,"-bin")==0 ) binfile=*++argv; else if(strcmp(*argv,"-freqfile")==0 ) freqfile=*++argv; else if(strcmp(*argv,"-outnames")==0 ) outfiles=*++argv; else if(strcmp(*argv,"-minMaf")==0 ) minMaf = atoi(*++argv); else if(strcmp(*argv,"-a")==0 ) a = atof(*++argv); else if(strcmp(*argv,"-k0")==0 ) k0 = atof(*++argv); else if(strcmp(*argv,"-k1")==0 ) k1 = atof(*++argv); else if(strcmp(*argv,"-k2")==0 ) k2 = atof(*++argv); else if(strcmp(*argv,"-calcA")==0 ) calcA = atoi(*++argv); else{ fprintf(stderr,"Unknown arg:%s\n",*argv); info(); return 0; } ++argv; } if(bfile==NULL&&binfile==NULL){ fprintf(stderr,"Please supply input data file: -beagle OR -bin"); info(); }else if(bfile!=NULL&&binfile!=NULL){ fprintf(stderr,"Please supply input data file: -beagle OR -bin"); info(); } if(outfiles==NULL){ if(bfile!=NULL) outfiles=bfile; else outfiles = binfile; fprintf(stderr,"Will use: %s as prefix for output\n",outfiles); } FILE *flog=openFile(outfiles,".log"); clock_t t=clock();//how long time does the run take time_t t2=time(NULL); std::vector<perChr> pd; if(bfile!=NULL){ bgl d=readBeagle(bfile); fprintf(stderr,"Input beaglefile has dim: nsites=%d nind=%d\n",d.nSites,d.nInd); pd = makeDat(d); gzFile dfile = openFileGz(outfiles,".bin.gz","wb"); for(uint i=0;i<pd.size();i++) marshall_dump(dfile,pd[i]); gzclose(dfile); }else{ gzFile dfile = getGz(binfile,"rb"); pd = marshall_read(dfile); gzclose(dfile); } //calculate frequencies for(uint i=0;i<pd.size();i++) setFreq(pd[i]); // printStuff(pd); //set seed srand(seed); //below is the main looping trhought the iterations. FILE *fp =openFile(outfiles,".freq"); for(int i=0;i<pd[0].nSites;i++) fprintf(fp,"%f ",exp(pd[0].freq[i])); fclose(fp); double *freq = NULL; if(freqfile!=NULL){ freq = readDouble(freqfile,pd[0].nSites); fprintf(stderr,"freq=%f\n",freq[0]); } para p; p.pair[0] = 0;p.pair[1] = 1; p.a=a;p.k0=k0;p.k1=k1;p.k2=k2; fprintf(stderr,"pa=%f pk0=%f\n",p.a,p.k0); hmm res = analysis(pd[0],freq,p,calcA); gzFile bo = openFileGz(outfiles,".bres.gz","wb"); fdump(bo,res,pd[0].name); //deallocate memory for(int i=0;1&&i<dumpedFiles.size();i++){ fprintf(stderr,"dumpedfiles are: %s\n",dumpedFiles[i]); free(dumpedFiles[i]); } fprintf(stderr, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(stderr, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); // print to log file fprintf(flog, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(flog, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); fclose(flog); gzclose(bo); return 0; }