std::vector<char*> getRegions(const char * name){ if(!fexists(name)){ fprintf(stderr,"\t-> Problems opening file: %s\n",name); exit(0); } const char* delims = " \t\n\r"; std::vector<char*> ret; FILE *fp =getFILE(name,"r"); char buffer[fsize(name)+1]; if(fsize(name)!=fread(buffer,1,fsize(name),fp)) fprintf(stderr,"[%s] Problems reading %lu from: %s\n",__FUNCTION__,fsize(name),name); buffer[fsize(name)]='\0'; char *tok = strtok(buffer,delims); while(tok!=NULL){ if(tok[0]!='#'){ ret.push_back(strdup(tok)); } tok = strtok(NULL,delims); } fprintf(stderr,"\t-> From regionsfile: %s we read %lu\n",name,ret.size()); fclose(fp); return ret; }
double *readDouble(const char*fname,int hint){ FILE *fp = getFILE(fname,"r"); char buf[fsize(fname)+1]; if(fsize(fname)!=fread(buf,sizeof(char),fsize(fname),fp)){ fprintf(stderr,"Problems reading file: %s\n will exit\n",fname); exit(0); } buf[fsize(fname)]='\0'; std::vector<double> res; res.push_back(atof(strtok(buf,"\t\n "))); char *tok=NULL; while((tok=strtok(NULL,"\t\n "))) { //fprintf(stderr,"%s\n",tok); res.push_back(atof(tok)); } // fprintf(stderr,"size of prior=%lu\n",res.size()); if(hint!=res.size()){ fprintf(stderr,"problem with size of dimension of prior %d vs %lu\n",hint,res.size()); for(uint i=0;i<res.size();i++) fprintf(stderr,"%d=%f\n",i,res[i]); exit(0); } double *ret = new double[res.size()]; for(uint i=0;i<res.size();i++) ret[i] = log(res[i]); fclose(fp); return ret; }
/** * DATE: 2010-9-15 * FUNCTION: open sfs file * PARAMETER: outfiles : outfile name. writeFr : whether writer frequence file. doBay : whether do Bay. doJoint : whether do Joint * RETURN: OPENSFS_ERROR:if can not open file ,OPENSFS_SUCC : if can open file. */ int Files::OpenSfsfile(const string outfiles, const int writeFr, const int doBay, const int doJoint) { //generete the output filenames if(!outfiles.c_str()) { printf("Must supply -outfiles (-fai)\n"); return OPENSFS_ERROR; } fSFSall = outfiles + ".sfs"; fFreq = outfiles + ".frq"; fJoint = outfiles + ".bjoint"; //open the persite FILE streams if(writeFr) { //freqfile.clear(); //freqfile.open(fFreq.c_str()); freqfile= getFILE(fFreq.c_str(),"w"); } if(doBay) { //sfsfile.clear(); //sfsfile.open(fSFSall.c_str()); sfsfile = getFILE(fSFSall.c_str(),"w"); //sfsfile-open(fSFSall.c_str()); } if(doJoint) { //jointSfsfile.clear(); //jointSfsfile.open(fJoint.c_str(), ios::binary); jointSfsfile = getFILE(fJoint.c_str(),"w"); } if ((writeFr && !freqfile) || (doBay && !sfsfile) || (doJoint && !jointSfsfile)) { cerr << "\topen sfs file failed!" << endl; return OPENSFS_ERROR; } return OPENSFS_SUCC; }
//this function is much to slow on a genome scale should be improved fMap getMap(char *fname,std::map<char*,int,ltstr> *revMap){ const char *delims = "\t\n "; FILE *fp=getFILE(fname,"r"); char buf[LENS]; int nsites=0; fMap ret; std::map<char *,int>::iterator rit; while(fgets(buf,LENS,fp)){ char *chr = strtok(buf,delims); strtok(NULL,delims);//rsnumber strtok(NULL,delims);//centimorgan char *tok = strtok(NULL,delims); if(tok==NULL){ fprintf(stderr,"Problem with fileformat in .bim file\n"); exit(0); } int pos = atoi(tok)-1;//genomic position in bp mm value; value.major = refToInt[strtok(NULL,delims)[0]]; value.minor = refToInt[strtok(NULL,delims)[0]]; //check for N if this exists; if(value.major==4||value.minor==4){ fprintf(stderr,"N extists in major minor defintion\n"); break; } // fprintf(stderr,"chr=%s pos=%d major=%d minor=%d\n",chr,pos,mymm.major,mymm.minor); rit=revMap->find(chr); if(rit==revMap->end()){ fprintf(stderr,"Problem finding chromosome: %s in lookuptable\n",chr); exit(0); } mm key; key.major = rit->second; key.minor = pos; fMap::iterator it = ret.find(key); if(it!=ret.end()){ fprintf(stderr,"duplicate entry in filterlist:%s : will exit offending position below\n",fname); fprintf(stderr,"chr=%s pos=%d major=%d minor=%d\n",chr,pos,value.major,value.minor); exit(0); }else ret.insert(fMap::value_type(key, value)); } fclose(fp); return ret; }
void readSFS(const char*fname,int hint,double *ret){ fprintf(stderr,"reading: %s\n",fname); FILE *fp = getFILE(fname,"r"); char buf[fsize(fname)+1]; if(fsize(fname)!=fread(buf,sizeof(char),fsize(fname),fp)){ fprintf(stderr,"Problems reading file: %s\n will exit\n",fname); exit(0); } buf[fsize(fname)]='\0'; std::vector<double> res; char *tok=NULL; tok = strtok(buf,"\t\n "); if(!tok){ fprintf(stderr,"File:%s looks empty\n",fname); exit(0); } res.push_back(atof(tok)); while((tok=strtok(NULL,"\t\n "))) { //fprintf(stderr,"%s\n",tok); res.push_back(atof(tok)); } // fprintf(stderr,"size of prior=%lu\n",res.size()); if(hint!=res.size()){ fprintf(stderr,"\t-> Problem with size of dimension of prior %d vs %lu\n",hint,res.size()); for(size_t i=0;0&&i<res.size();i++) fprintf(stderr,"%zu=%f\n",i,res[i]); exit(0); } for(size_t i=0;i<res.size();i++){ ret[i] = exp(res[i]); // fprintf(stderr,"i=%d %f\n",i,ret[i]); } fclose(fp); }
void filter::getOptions(argStruct *arguments){ fname=angsd::getArg("-filter",fname,arguments); if(fname!=NULL) doFilter = findType(fname); //1=bim 2=keep doMajorMinor = angsd::getArg("-doMajorMinor",doMajorMinor,arguments); if(doMajorMinor==3 && doFilter!=1){ fprintf(stderr,"Must supply -filter with .bim file if -doMajorMinor 3\n"); exit(0); } if(doFilter==1){ fm = getMap(fname,revMap); fprintf(stderr,"\t-> number of sites in filter: %lu\n",fm.size()); }else if(doFilter==2){ fp = getFILE(fname,"r"); // readSites(); fprintf(stderr,"Filtering with .keep is still beta\n"); } minInd = angsd::getArg("-minInd",minInd,arguments); }
int main (int argc, char *argv[]) { if (argc==1) { info(); return 0; } /// DECLARE AND INITIALIZE VARIABLES char *sfsfile1=NULL; // posterior probabilities char *sfsfile2=NULL; char *fstfile=NULL; // first guess of fst char *priorfile1=NULL; // priors (needed for weighting function only) char *priorfile2=NULL; char *priorfile12=NULL; // joint prior, it is 2D-SFS FILE *outpost; char *outfile=NULL; char *foutpost=NULL; int argPos = 1, increment = 0, nind = 0, nind1 = 0, nind2 = 0, nsites = 0, verbose = 0, nsums = 1, block_size = 10000, K=0, isfold=0, firstbase=0; /// READ AND ASSIGN INPUT PARAMETERS while (argPos<argc) { increment = 0; if(strcmp(argv[argPos],"-postfiles")==0) { sfsfile1 = argv[argPos+1]; sfsfile2 = argv[argPos+2]; increment = 1; } else if(strcmp(argv[argPos],"-fstfile")==0) { fstfile = argv[argPos+1]; } else if(strcmp(argv[argPos],"-priorfile")==0) { priorfile12 = argv[argPos+1]; } else if(strcmp(argv[argPos],"-priorfiles")==0) { priorfile1 = argv[argPos+1]; priorfile2 = argv[argPos+2]; increment = 1; } else if(strcmp(argv[argPos],"-outfile")==0) outfile = argv[argPos+1]; else if(strcmp(argv[argPos],"-nind")==0) { nind1 = atoi(argv[argPos+1]); nind2 = atoi(argv[argPos+2]); nind = nind1 + nind2; increment = 1; } else if(strcmp(argv[argPos],"-nsites")==0) nsites = atoi(argv[argPos+1]); else if(strcmp(argv[argPos],"-K")==0) K = atof(argv[argPos+1]); else if(strcmp(argv[argPos],"-verbose")==0) verbose = atoi(argv[argPos+1]); else if(strcmp(argv[argPos],"-block_size")==0) block_size = atoi(argv[argPos+1]); else if(strcmp(argv[argPos],"-nsums")==0) nsums = atoi(argv[argPos+1]); else if(strcmp(argv[argPos],"-firstbase")==0) firstbase = atoi(argv[argPos+1]); else if(strcmp(argv[argPos],"-isfold")==0) isfold = atoi(argv[argPos+1]); else { printf("\tUnknown arguments: %s\n",argv[argPos]); info(); return 0; // terminate } argPos = argPos + 2 + increment; } /// CHECK INPUT if((sfsfile1 == NULL) & (sfsfile2 == NULL) ) { fprintf(stderr,"\nMust supply -postfiles.\n"); info(); return 0; } if(outfile == NULL) { fprintf(stderr,"\nMust supply -outfile.\n"); info(); return 0; } if((priorfile1 == NULL) & (fstfile==NULL) & (K==0)) { fprintf(stderr,"\nPerhaps you forgot to supply -priofiles and -fstfile when using an automatic setting of lambda?\n"); } if((priorfile1 != NULL) & (priorfile12!=NULL)) { fprintf(stderr,"\nYou should give either -priorfiles or -priorfile, otherwise I don't know if you want to use a 2D-SFS or the corrected product of marginal spectra as prior\n"); info(); return 0; } if((fstfile != NULL) & (priorfile12!=NULL)) { fprintf(stderr,"\nIf you give -fstfile I assume you want to use the correction for marginal spectra. So why are you giving -priorfile too? You should only give -priorfiles eventually if K=0.\n"); info(); return 0; } if((isfold) & (priorfile12!=NULL)) { fprintf(stderr,"\nSorry. Handling the folded 2D-SFS has not been implemented yet. Please contribute or push a request. Currently -ifold 1 and -priorfile12 !NULL are not compatible.\n"); info(); return 0; } /// OUTPUT foutpost = append(outfile, ""); fprintf(stderr,"\t->Dumping file: %s\n", foutpost); outpost = getFILE(foutpost, "w"); // print input arguments // UPDATE THIS !!! fprintf(stderr,"\t->Using some of these args: -nind %d -nind1 %d -nind2 %d -nsites %d -postfiles %s %s -priorfiles %s %s -priorfile %s -fstfile %s -outfile %s -verbose %d -nsums %d -offset %d -K %d\n", nind, nind1, nind2, nsites, sfsfile1, sfsfile2, priorfile1, priorfile2, priorfile12, fstfile, foutpost, verbose, nsums, firstbase, K); // READ PRIORS (if provided) // marginal spectra array<double> prior1; array<double> prior2; if (priorfile1 != NULL) { if (verbose==1) fprintf(stderr, "\nAdding priors..."); prior1 = readArray(priorfile1, nind1, isfold); prior2 = readArray(priorfile2, nind2, isfold); } // 2D-SFS matrix<double> prior12; if ((priorfile12==NULL)==0) { if (verbose==1) fprintf(stderr, "\nAdding 2D prior..."); prior12 = readPrior12(priorfile12, nind1*2+1, nind2*2+1); if (verbose==2) { fprintf(stderr, "\nPrior 2d:\n"); writematrix(prior12, stderr); } //// the difference with this prior is that I don't add the prior, but I add the prior directly at computeFST step } /// GET POSITIONS OF BLOCKS if (block_size>(nsites-firstbase)) block_size=(nsites-firstbase); array<int> start; array<int> end; start=getStart(nsites, firstbase, block_size); end=getEnd(nsites, firstbase, block_size); /// ITERATE OVER EACH BLOCK int nwin = start.x; for (int n=0; n<nwin; n++) { fprintf(stderr, "Block %d out of %d from %d to %d\n", n, (nwin-1), start.data[n], end.data[n]); // READ POSTERIOR PROBABILITIES FILES matrix<double> post1; matrix<double> post2; post1 = readFileSub(sfsfile1, nind1, start.data[n], end.data[n], isfold); post2 = readFileSub(sfsfile2, nind2, start.data[n], end.data[n], isfold); if (priorfile12!=NULL) { // if from -realSFS 1 they are -log normSFS(post1, 1); // 2nd argument is islog normSFS(post2, 1); } // IF NOT FST FILE PROVIDED if ((fstfile == NULL)) { if (verbose==1) fprintf(stderr,"Computing FST and no first guess provided.\n"); if (priorfile12==NULL) { if (isfold) { computeVarReyFold(post1, post2, verbose, outpost, nsums); } else { computeVarRey(post1, post2, verbose, outpost, nsums); } } else { if (verbose==1) fprintf(stderr,"Using 2D-SFS as prior. You didn't run sfstools, right??? Use only -realSFS 1.\n"); computeVarRey12New(post1, post2, verbose, outpost, nsums, prior12); } } else { // IF FST FILE IS INDEED PROVIDED if (verbose==1) fprintf(stderr,"Computing FST and first guess provided.\n"); array<double> firstfst; firstfst=readFSTsub(fstfile, nsites, start.data[n], end.data[n]); array <double> sublam; sublam = getLambdas(firstfst, prior1, prior2, K, verbose, isfold); if (verbose==1) fprintf(stderr,"Computed lambdas.\n"); if (isfold) { computeVarRey2Fold(post1, post2, verbose, outpost, nsums, sublam); } else { computeVarRey2(post1, post2, verbose, outpost, nsums, sublam); } delete [] firstfst.data; delete [] sublam.data; } cleanup(post1); cleanup(post2); } // end blocks iterations delete [] start.data; delete [] end.data; free(foutpost); return 0; } // end main