//chr start stop is given from commandine myMap::iterator iter_init(perpsmc *pp,char *chr,int start,int stop){ assert(chr!=NULL); myMap::iterator it = pp->mm.find(chr); if(it==pp->mm.end()){ fprintf(stderr,"\t-> [%s] Problem finding chr: %s\n",__FUNCTION__,chr); return it; } my_bgzf_seek(pp->bgzf_gls,it->second.saf,SEEK_SET); my_bgzf_seek(pp->bgzf_pos,it->second.pos,SEEK_SET); //fprintf(stderr,"pp->gls:%p\n",pp->gls); if(pp->pos) delete [] pp->pos; if(pp->gls) delete [] pp->gls; pp->pos = new int[it->second.nSites]; my_bgzf_read(pp->bgzf_pos,pp->pos,sizeof(int)*it->second.nSites); pp->gls = new double[2*it->second.nSites];//<-valgrind complains about large somthing my_bgzf_read(pp->bgzf_gls,pp->gls,2*sizeof(double)*it->second.nSites); // fprintf(stderr," end: %f %f\n",pp->gls[0],pp->gls[1]); pp->first=0; if(start!=-1) while(pp->first<it->second.nSites&&pp->pos[pp->first]<start) pp->first++; pp->last = it->second.nSites; if(stop!=-1&&stop<=pp->pos[pp->last-1]){ pp->last=pp->first; while(pp->pos[pp->last]<stop) pp->last++; } return it; }
perpsmc * perpsmc_init(char *fname){ perpsmc *ret = new perpsmc ; ret->fname = strdup(fname); ret->gls =NULL; ret->pos = NULL; ret->bgzf_pos=ret->bgzf_gls=NULL; ret->pos = NULL; size_t clen; if(!fexists(fname)){ fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname); exit(0); } FILE *fp = NULL; fp=fopen(fname,"r"); if(fp==NULL){ fprintf(stderr,"\t-> Problem opening file:%s\n",fname); exit(0); } char buf[8]; assert(fread(buf,1,8,fp)==8); ret->version = psmcversion(fname); fprintf(stderr,"\t-> Version of fname: \'%s\' is:%d\n",fname,ret->version); if(ret->version!=1){ fprintf(stderr,"\t-> Looks like you are trying to use a version of PSMC that does not exists\n"); exit(0); } ret->nSites =0; while(fread(&clen,sizeof(size_t),1,fp)){ char *chr = (char*)malloc(clen+1); assert(clen==fread(chr,1,clen,fp)); chr[clen] = '\0'; datum d; if(1!=fread(&d.nSites,sizeof(size_t),1,fp)){ fprintf(stderr,"[%s.%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname); exit(0); } ret->nSites += d.nSites; if(1!=fread(&d.pos,sizeof(int64_t),1,fp)){ fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname); exit(0); } if(1!=fread(&d.saf,sizeof(int64_t),1,fp)){ fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname); exit(0); } myMap::iterator it = ret->mm.find(chr); if(it==ret->mm.end()) ret->mm[chr] =d ; else{ fprintf(stderr,"Problem with chr: %s, key already exists, psmc file needs to be sorted. (sort your -rf that you used for input)\n",chr); exit(0); } } fclose(fp); char *tmp =(char*)calloc(strlen(fname)+100,1);//that should do it tmp=strncpy(tmp,fname,strlen(fname)-3); // fprintf(stderr,"tmp:%s\n",tmp); char *tmp2 = (char*)calloc(strlen(fname)+100,1);//that should do it snprintf(tmp2,strlen(fname)+100,"%sgz",tmp); fprintf(stderr,"\t-> Assuming .psmc.gz file: %s\n",tmp2); ret->bgzf_gls = bgzf_open(tmp2,"r"); if(ret->bgzf_gls) my_bgzf_seek(ret->bgzf_gls,8,SEEK_SET); if(ret->bgzf_gls && ret->version!=psmcversion(tmp2)){ fprintf(stderr,"\t-> Problem with mismatch of version of %s vs %s %d vs %d\n",fname,tmp2,ret->version,psmcversion(tmp2)); exit(0); } snprintf(tmp2,strlen(fname)+100,"%spos.gz",tmp); fprintf(stderr,"\t-> Assuming .psmc.pos.gz: %s\n",tmp2); ret->bgzf_pos = bgzf_open(tmp2,"r"); if(ret->pos) my_bgzf_seek(ret->bgzf_pos,8,SEEK_SET); if(ret->bgzf_pos&& ret->version!=psmcversion(tmp2)){ fprintf(stderr,"Problem with mismatch of version of %s vs %s\n",fname,tmp2); exit(0); } //assert(ret->pos!=NULL&&ret->saf!=NULL); free(tmp);free(tmp2); return ret; }
int fst_stat(int argc,char **argv){ char *bname = *argv; fprintf(stderr,"\t-> Assuming idxname:%s\n",bname); perfst *pf = perfst_init(bname); args *pars = getArgs(--argc,++argv); int *ppos = NULL; int chs = choose((int)pf->names.size(),2); // fprintf(stderr,"choose:%d \n",chs); double **ares = new double*[chs]; double **bres = new double*[chs]; double unweight[chs]; double wa[chs]; double wb[chs]; size_t nObs[chs]; for(int i=0;i<chs;i++){ unweight[i] = wa[i] = wb[i] =0.0; nObs[i] = 0; } for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){ if(pars->chooseChr!=NULL){ it = pf->mm.find(pars->chooseChr); if(it==pf->mm.end()){ fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr); break; } } if(it->second.nSites==0) continue; my_bgzf_seek(pf->fp,it->second.off,SEEK_SET); ppos = new int[it->second.nSites]; my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites); for(int i=0;i<choose((int)pf->names.size(),2);i++){ ares[i] = new double[it->second.nSites]; bres[i] = new double[it->second.nSites]; my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites); my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites); } size_t first=0; if(pars->start!=-1) while(ppos[first]<pars->start) first++; size_t last=it->second.nSites; if(pars->stop!=-1&&pars->stop<=ppos[last-1]){ last=first; while(ppos[last]<pars->stop) last++; } // fprintf(stderr,"pars->stop:%d ppos:%d first:%d last:%d\n",pars->stop,ppos[last-1],first,last); for(size_t s=first;s<last;s++){ #if 0 fprintf(stdout,"%s\t%d",it->first,ppos[s]+1); for(int i=0;i<choose(pf->names.size(),2);i++) fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]); fprintf(stdout,"\n"); #endif for(int i=0;i<choose((int)pf->names.size(),2);i++){ if(bres[i][s]!=0){ unweight[i] += ares[i][s]/bres[i][s]; nObs[i]++; } wa[i] += ares[i][s]; wb[i] += bres[i][s]; } } for(int i=0;i<choose((int)pf->names.size(),2);i++){ delete [] ares[i]; delete [] bres[i]; } delete [] ppos; if(pars->chooseChr!=NULL) break; } double fstUW[chs]; double fstW[chs]; for(int i=0;i<chs;i++){ fstUW[i] = unweight[i]/(1.0*((double)nObs[i])); fstW[i] = wa[i]/wb[i]; fprintf(stderr,"\t-> FST.Unweight[nObs:%lu]:%f Fst.Weight:%f\n",nObs[i],fstUW[i],fstW[i]); fprintf(stdout,"%f %f\n",fstUW[i],fstW[i]); } if(chs==3){ //if chr==3 then we have 3pops and we will also calculate pbs statistics calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values for(int i=0;i<3;i++) fprintf(stderr,"\t-> pbs.pop%d\t%f\n",i+1,fstW[i]); } delete [] ares; delete [] bres; destroy_args(pars); perfst_destroy(pf); return 0; }
int fst_print(int argc,char **argv){ char *bname = *argv; fprintf(stderr,"\t-> Assuming idxname:%s\n",bname); perfst *pf = perfst_init(bname); writefst_header(stderr,pf); args *pars = getArgs(--argc,++argv); int *ppos = NULL; fprintf(stderr,"choose:%d \n",choose((int)pf->names.size(),2)); double **ares = new double*[choose((int)pf->names.size(),2)]; double **bres = new double*[choose((int)pf->names.size(),2)]; for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){ if(pars->chooseChr!=NULL){ it = pf->mm.find(pars->chooseChr); if(it==pf->mm.end()){ fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr); break; } } if(it->second.nSites==0) continue; my_bgzf_seek(pf->fp,it->second.off,SEEK_SET); ppos = new int[it->second.nSites]; my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites); for(int i=0;i<choose((int)pf->names.size(),2);i++){ ares[i] = new double[it->second.nSites]; bres[i] = new double[it->second.nSites]; my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites); my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites); } size_t first=0; if(pars->start!=-1) while(ppos[first]<pars->start) first++; size_t last=it->second.nSites; if(pars->stop!=-1&&pars->stop<=ppos[last-1]){ last=first; while(ppos[last]<pars->stop) last++; } fprintf(stderr,"pars->stop:%d ppos:%d first:%lu last:%lu\n",pars->stop,ppos[last-1],first,last); for(size_t s=first;s<last;s++){ fprintf(stdout,"%s\t%d",it->first,ppos[s]+1); for(int i=0;i<choose((int)pf->names.size(),2);i++) fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]); fprintf(stdout,"\n"); } for(int i=0;i<choose((int)pf->names.size(),2);i++){ delete [] ares[i]; delete [] bres[i]; } delete [] ppos; if(pars->chooseChr!=NULL) break; } delete [] ares; delete [] bres; destroy_args(pars); perfst_destroy(pf); return 0; }
int fst_stat2(int argc,char **argv){ int pS,pE;//physical start,physical end int begI,endI;//position in array for pS, pE; char *bname = *argv; fprintf(stderr,"\t-> Assuming idxname:%s\n",bname); perfst *pf = perfst_init(bname); args *pars = getArgs(--argc,++argv); fprintf(stderr,"win:%d step:%d\n",pars->win,pars->step); int *ppos = NULL; int chs = choose((int)pf->names.size(),2); // fprintf(stderr,"choose:%d \n",chs); double **ares = new double*[chs]; double **bres = new double*[chs]; double unweight[chs]; double wa[chs]; double wb[chs]; size_t nObs =0; //print header fprintf(stdout,"region\tchr\tmidPos\tNsites"); for(int c1=0;c1<chs-1;c1++) for(int c2=c1+1;c2<chs;c2++) fprintf(stdout,"\tFst%d%d",c1,c2); if(chs==3) fprintf(stdout,"\tPBS0\tPBS1\tPBS2"); fprintf(stdout,"\n"); for(myFstMap::iterator it=pf->mm.begin();it!=pf->mm.end();++it){ if(pars->chooseChr!=NULL){ it = pf->mm.find(pars->chooseChr); if(it==pf->mm.end()){ fprintf(stderr,"Problem finding chr: %s\n",pars->chooseChr); break; } } fprintf(stderr,"nSites:%lu\n",it->second.nSites); if(it->second.nSites==0&&pars->chooseChr!=NULL) break; else if(it->second.nSites==0&&pars->chooseChr==NULL) continue; my_bgzf_seek(pf->fp,it->second.off,SEEK_SET); ppos = new int[it->second.nSites]; my_bgzf_read(pf->fp,ppos,sizeof(int)*it->second.nSites); for(int i=0;i<it->second.nSites;i++)//what? why? dragon! ppos[i]++; for(int i=0;i<choose((int)pf->names.size(),2);i++){ ares[i] = new double[it->second.nSites]; bres[i] = new double[it->second.nSites]; my_bgzf_read(pf->fp,ares[i],sizeof(double)*it->second.nSites); my_bgzf_read(pf->fp,bres[i],sizeof(double)*it->second.nSites); } if(pars->type==0) pS = ((pars->start!=-1?pars->start:ppos[0])/pars->step)*pars->step +pars->step; else if(pars->type==1) pS = (pars->start!=-1?pars->start:ppos[0]); else if(pars->type==2) pS = 1; pE = pS+pars->win; begI=endI=0; //fprintf(stderr,"ps:%d\n",pS);exit(0); if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1])){ fprintf(stderr,"end of dataset is before end of window: end of window:%d last position in chr:%d\n",pE,ppos[it->second.nSites-1]); // return str; } while(ppos[begI]<pS) begI++; endI=begI; while(endI<it->second.nSites &&ppos[endI]<pE) endI++; //fprintf(stderr,"begI:%d endI:%d\n",begI,endI); while(1){ for(int i=0;i<chs;i++) unweight[i] = wa[i] = wb[i] =0.0; nObs=0; for(int s=begI;s<endI;s++){ #if 0 fprintf(stdout,"%s\t%d",it->first,ppos[s]+1); for(int i=0;i<choose(pf->names.size(),2);i++) fprintf(stdout,"\t%f\t%f",ares[i][s],bres[i][s]); fprintf(stdout,"\n"); #endif for(int i=0;i<choose((int)pf->names.size(),2);i++){ unweight[i] += ares[i][s]/bres[i][s]; wa[i] += ares[i][s]; wb[i] += bres[i][s]; } nObs++; } if(nObs>0) fprintf(stdout,"(%d,%d)(%d,%d)(%d,%d)\t%s\t%d\t%d",begI,endI-1,ppos[begI],ppos[endI-1],pS,pE,it->first,pS+(pE-pS)/2,endI-begI+1); double fstW[chs]; for(int i=0;nObs>0&&i<chs;i++){ fstW[i] = wa[i]/wb[i]; // fprintf(stdout,"\t%f\t%f",unweight[i]/(1.0*nObs),fstW[i]); fprintf(stdout,"\t%f",fstW[i]); } if(nObs>0&&chs==3){ //if chr==3 then we have 3pops and we will also calculate pbs statistics calcpbs(fstW);//<- NOTE: the pbs values will replace the fstW values for(int i=0;i<3;i++) fprintf(stdout,"\t%f",fstW[i]); } if(nObs>0) fprintf(stdout,"\n"); pS += pars->step; pE =pS+pars->win; if(pE>(pars->stop!=-1?pars->stop:ppos[it->second.nSites-1])) break; while(ppos[begI]<pS) begI++; while(ppos[endI]<pE) endI++; } for(int i=0;i<choose((int)pf->names.size(),2);i++){ delete [] ares[i]; delete [] bres[i]; } delete [] ppos; if(pars->chooseChr!=NULL) break; } delete [] ares; delete [] bres; destroy_args(pars); perfst_destroy(pf); return 0; }