Example #1
0
std::vector<char*> getRegions(const char * name){
  if(!fexists(name)){
    fprintf(stderr,"\t-> Problems opening file: %s\n",name);
    exit(0);
  }
  const char* delims = " \t\n\r";
  std::vector<char*> ret;
  FILE *fp =getFILE(name,"r");
  char buffer[fsize(name)+1];
  if(fsize(name)!=fread(buffer,1,fsize(name),fp))
    fprintf(stderr,"[%s] Problems reading %lu from: %s\n",__FUNCTION__,fsize(name),name);
  buffer[fsize(name)]='\0';
  
  char *tok = strtok(buffer,delims);

  while(tok!=NULL){
    if(tok[0]!='#'){
      ret.push_back(strdup(tok));
    }
    tok = strtok(NULL,delims);
  }

  fprintf(stderr,"\t-> From regionsfile: %s we read %lu\n",name,ret.size());
  fclose(fp);
  return ret;
}
Example #2
0
double *readDouble(const char*fname,int hint){
  FILE *fp = getFILE(fname,"r");
  char buf[fsize(fname)+1];
  if(fsize(fname)!=fread(buf,sizeof(char),fsize(fname),fp)){
    fprintf(stderr,"Problems reading file: %s\n will exit\n",fname);
    exit(0);
  }
  buf[fsize(fname)]='\0';
  std::vector<double> res;
  res.push_back(atof(strtok(buf,"\t\n ")));
  char *tok=NULL;
  while((tok=strtok(NULL,"\t\n "))) {  
    //fprintf(stderr,"%s\n",tok);
    res.push_back(atof(tok));

  }
  //  fprintf(stderr,"size of prior=%lu\n",res.size());
  if(hint!=res.size()){
    fprintf(stderr,"problem with size of dimension of prior %d vs %lu\n",hint,res.size());
    for(uint i=0;i<res.size();i++)
      fprintf(stderr,"%d=%f\n",i,res[i]);
    exit(0);
  }
  double *ret = new double[res.size()];
  for(uint i=0;i<res.size();i++)
    ret[i] = log(res[i]);
  fclose(fp);
  return ret;
}
Example #3
0
/**
 * DATE: 2010-9-15
 * FUNCTION: open sfs file
 * PARAMETER: outfiles : outfile name. 
			  writeFr : whether writer frequence file.
			  doBay : whether do Bay.
			  doJoint : whether do Joint
 * RETURN:	 OPENSFS_ERROR:if can not open file  ,OPENSFS_SUCC : if can open file.
 */
int Files::OpenSfsfile(const string outfiles, const int writeFr, const int doBay, const int doJoint)
{
	 //generete the output filenames
	if(!outfiles.c_str())
	{
		printf("Must supply -outfiles (-fai)\n");
		return OPENSFS_ERROR;
	}
	 
	fSFSall = outfiles + ".sfs";
	fFreq = outfiles + ".frq";
	fJoint = outfiles + ".bjoint";
 
    //open the persite FILE streams
	if(writeFr)
	{
		//freqfile.clear();
		//freqfile.open(fFreq.c_str());
		freqfile= getFILE(fFreq.c_str(),"w");
	}
	if(doBay)
	{
		//sfsfile.clear();
		//sfsfile.open(fSFSall.c_str());
		sfsfile = getFILE(fSFSall.c_str(),"w");
		//sfsfile-open(fSFSall.c_str());
	}
	if(doJoint)
	{
		//jointSfsfile.clear();
		//jointSfsfile.open(fJoint.c_str(), ios::binary);
		jointSfsfile = getFILE(fJoint.c_str(),"w");
	}
	if ((writeFr && !freqfile) || (doBay && !sfsfile) || (doJoint && !jointSfsfile))
	{
		cerr << "\topen sfs file failed!" << endl;
		return OPENSFS_ERROR;
	}
	return OPENSFS_SUCC;

}
Example #4
0
//this function is much to slow on a genome scale should be improved
fMap getMap(char *fname,std::map<char*,int,ltstr> *revMap){
  const char *delims = "\t\n ";
  FILE *fp=getFILE(fname,"r");
  
  char buf[LENS];
  int nsites=0;
  fMap ret;
  std::map<char *,int>::iterator rit;
  while(fgets(buf,LENS,fp)){
    char *chr = strtok(buf,delims);
    strtok(NULL,delims);//rsnumber
    strtok(NULL,delims);//centimorgan
    char *tok = strtok(NULL,delims);
    if(tok==NULL){
      fprintf(stderr,"Problem with fileformat in .bim file\n");
      exit(0);
    }
    int pos = atoi(tok)-1;//genomic position in bp
    mm value;
    value.major = refToInt[strtok(NULL,delims)[0]];
    value.minor = refToInt[strtok(NULL,delims)[0]];

    //check for N if this exists;
    if(value.major==4||value.minor==4){
      fprintf(stderr,"N extists in major minor defintion\n");
      break;
    }
    //    fprintf(stderr,"chr=%s pos=%d major=%d minor=%d\n",chr,pos,mymm.major,mymm.minor);
    rit=revMap->find(chr);
    if(rit==revMap->end()){
      fprintf(stderr,"Problem finding chromosome: %s in lookuptable\n",chr);
      exit(0);
    }

    mm key;
    key.major = rit->second;
    key.minor = pos;
    fMap::iterator it = ret.find(key);
    if(it!=ret.end()){
      fprintf(stderr,"duplicate entry in filterlist:%s : will exit offending position below\n",fname);
      fprintf(stderr,"chr=%s pos=%d major=%d minor=%d\n",chr,pos,value.major,value.minor);
      exit(0);
    }else
      ret.insert(fMap::value_type(key, value));
  }
  fclose(fp);
  return ret;
}
Example #5
0
void readSFS(const char*fname,int hint,double *ret){
  fprintf(stderr,"reading: %s\n",fname);
  FILE *fp = getFILE(fname,"r");
  char buf[fsize(fname)+1];
  if(fsize(fname)!=fread(buf,sizeof(char),fsize(fname),fp)){
    fprintf(stderr,"Problems reading file: %s\n will exit\n",fname);
    exit(0);
  }
  buf[fsize(fname)]='\0';
  std::vector<double> res;
  char *tok=NULL;
  tok = strtok(buf,"\t\n ");
  if(!tok){
    fprintf(stderr,"File:%s looks empty\n",fname);
    exit(0);
  }
  res.push_back(atof(tok));

  while((tok=strtok(NULL,"\t\n "))) {  
    //fprintf(stderr,"%s\n",tok);
    res.push_back(atof(tok));

  }
  //  fprintf(stderr,"size of prior=%lu\n",res.size());
  if(hint!=res.size()){
    fprintf(stderr,"\t-> Problem with size of dimension of prior %d vs %lu\n",hint,res.size());
    for(size_t i=0;0&&i<res.size();i++)
      fprintf(stderr,"%zu=%f\n",i,res[i]);
    exit(0);
  }
  for(size_t i=0;i<res.size();i++){
    
    ret[i] = exp(res[i]);
    // fprintf(stderr,"i=%d %f\n",i,ret[i]);
  }
  fclose(fp);
}
Example #6
0
void filter::getOptions(argStruct *arguments){
  fname=angsd::getArg("-filter",fname,arguments);
  
  if(fname!=NULL)
    doFilter = findType(fname);
  //1=bim 2=keep

  doMajorMinor = angsd::getArg("-doMajorMinor",doMajorMinor,arguments);
  if(doMajorMinor==3 && doFilter!=1){
    fprintf(stderr,"Must supply -filter with .bim file if -doMajorMinor 3\n");
    exit(0);
  }
  

  if(doFilter==1){
    fm = getMap(fname,revMap);
    fprintf(stderr,"\t-> number of sites in filter: %lu\n",fm.size());
  }else if(doFilter==2){
    fp = getFILE(fname,"r");
    //  readSites();
    fprintf(stderr,"Filtering with .keep is still beta\n");
  }
  minInd = angsd::getArg("-minInd",minInd,arguments);
}
Example #7
0
int main (int argc, char *argv[]) {
  
   if (argc==1) {
    info();
   return 0;   
  }

  /// DECLARE AND INITIALIZE VARIABLES
  
  char *sfsfile1=NULL; // posterior probabilities
  char *sfsfile2=NULL;
  char *fstfile=NULL; // first guess of fst
  char *priorfile1=NULL; // priors (needed for weighting function only)
  char *priorfile2=NULL;
  char *priorfile12=NULL; // joint prior, it is 2D-SFS

  FILE *outpost;
  char *outfile=NULL;
  char *foutpost=NULL;
  
  int argPos = 1, increment = 0, nind = 0, nind1 = 0, nind2 = 0, nsites = 0, verbose = 0, nsums = 1, block_size = 10000, K=0, isfold=0, firstbase=0;

  /// READ AND ASSIGN INPUT PARAMETERS
  
   while (argPos<argc) {
    increment = 0;
    if(strcmp(argv[argPos],"-postfiles")==0) {
      sfsfile1 = argv[argPos+1];
      sfsfile2 = argv[argPos+2];
      increment = 1;
    }
    else if(strcmp(argv[argPos],"-fstfile")==0) {
      fstfile = argv[argPos+1];
    }
    else if(strcmp(argv[argPos],"-priorfile")==0) {
      priorfile12 = argv[argPos+1];
    }
    else if(strcmp(argv[argPos],"-priorfiles")==0) {
      priorfile1 = argv[argPos+1];
      priorfile2 = argv[argPos+2];
      increment = 1;
    }     
    else if(strcmp(argv[argPos],"-outfile")==0) outfile = argv[argPos+1];
    else if(strcmp(argv[argPos],"-nind")==0) {
      nind1 = atoi(argv[argPos+1]);
      nind2 = atoi(argv[argPos+2]);
      nind = nind1 + nind2;
      increment = 1;
    }
    else if(strcmp(argv[argPos],"-nsites")==0) nsites = atoi(argv[argPos+1]);
    else if(strcmp(argv[argPos],"-K")==0) K = atof(argv[argPos+1]);
    else if(strcmp(argv[argPos],"-verbose")==0) verbose = atoi(argv[argPos+1]);
    else if(strcmp(argv[argPos],"-block_size")==0) block_size = atoi(argv[argPos+1]);    
    else if(strcmp(argv[argPos],"-nsums")==0) nsums = atoi(argv[argPos+1]);
    else if(strcmp(argv[argPos],"-firstbase")==0) firstbase = atoi(argv[argPos+1]);
    else if(strcmp(argv[argPos],"-isfold")==0) isfold = atoi(argv[argPos+1]);
    else {
      printf("\tUnknown arguments: %s\n",argv[argPos]);
      info();
      return 0; // terminate
    }
    argPos = argPos + 2 + increment;
  }
  
  /// CHECK INPUT
  if((sfsfile1 == NULL) & (sfsfile2 == NULL) ) {
    fprintf(stderr,"\nMust supply -postfiles.\n");
    info();
    return 0;
  }
  if(outfile == NULL) {
    fprintf(stderr,"\nMust supply -outfile.\n");
    info();
    return 0;
  }
  if((priorfile1 == NULL) & (fstfile==NULL) & (K==0)) {
    fprintf(stderr,"\nPerhaps you forgot to supply -priofiles and -fstfile when using an automatic setting of lambda?\n");
  }
  if((priorfile1 != NULL) & (priorfile12!=NULL)) {
    fprintf(stderr,"\nYou should give either -priorfiles or -priorfile, otherwise I don't know if you want to use a 2D-SFS or the corrected product of marginal spectra as prior\n");
    info();
    return 0;
  }
  if((fstfile != NULL) & (priorfile12!=NULL)) {
    fprintf(stderr,"\nIf you give -fstfile I assume you want to use the correction for marginal spectra. So why are you giving -priorfile too? You should only give -priorfiles eventually if K=0.\n");
    info();
    return 0;
  }
  if((isfold) & (priorfile12!=NULL)) {
    fprintf(stderr,"\nSorry. Handling the folded 2D-SFS has not been implemented yet. Please contribute or push a request. Currently -ifold 1 and -priorfile12 !NULL are not compatible.\n");
    info();
    return 0;
  }

  /// OUTPUT
  foutpost = append(outfile, "");
  fprintf(stderr,"\t->Dumping file: %s\n", foutpost);
  outpost = getFILE(foutpost, "w");

  // print input arguments // UPDATE THIS !!!
  fprintf(stderr,"\t->Using some of these args: -nind %d -nind1 %d -nind2 %d -nsites %d -postfiles %s %s -priorfiles %s %s -priorfile %s -fstfile %s -outfile %s -verbose %d -nsums %d -offset %d -K %d\n", nind, nind1, nind2, nsites, sfsfile1, sfsfile2, priorfile1, priorfile2, priorfile12, fstfile, foutpost, verbose, nsums, firstbase, K);

  // READ PRIORS (if provided)
  // marginal spectra
  array<double> prior1;
  array<double> prior2;
  if (priorfile1 != NULL) {
    if (verbose==1) fprintf(stderr, "\nAdding priors...");
    prior1 = readArray(priorfile1, nind1, isfold);
    prior2 = readArray(priorfile2, nind2, isfold);
  }
  // 2D-SFS
  matrix<double> prior12;
  if ((priorfile12==NULL)==0) {
    if (verbose==1) fprintf(stderr, "\nAdding 2D prior...");
    prior12 = readPrior12(priorfile12, nind1*2+1, nind2*2+1);
    if (verbose==2) {
      fprintf(stderr, "\nPrior 2d:\n");
      writematrix(prior12, stderr);
    }
    //// the difference with this prior is that I don't add the prior, but I add the prior directly at computeFST step
  }

  /// GET POSITIONS OF BLOCKS
  if (block_size>(nsites-firstbase)) block_size=(nsites-firstbase);
  array<int> start; array<int> end; 
  start=getStart(nsites, firstbase, block_size);
  end=getEnd(nsites, firstbase, block_size);
 
  /// ITERATE OVER EACH BLOCK
  int nwin = start.x;
  for (int n=0; n<nwin; n++) {

    fprintf(stderr, "Block %d out of %d from %d to %d\n", n, (nwin-1), start.data[n], end.data[n]);
  
    // READ POSTERIOR PROBABILITIES FILES
    matrix<double> post1;
    matrix<double> post2;
    post1 = readFileSub(sfsfile1, nind1, start.data[n], end.data[n], isfold);
    post2 = readFileSub(sfsfile2, nind2, start.data[n], end.data[n], isfold);

    if (priorfile12!=NULL) {
      // if from -realSFS 1 they are -log
      normSFS(post1, 1); // 2nd argument is islog
      normSFS(post2, 1);
    }

    // IF NOT FST FILE PROVIDED
    if ((fstfile == NULL)) {
       if (verbose==1) fprintf(stderr,"Computing FST and no first guess provided.\n");
       if (priorfile12==NULL) {
         if (isfold) {
           computeVarReyFold(post1, post2, verbose, outpost, nsums);
         } else {
           computeVarRey(post1, post2, verbose, outpost, nsums);
         }
       } else {
         if (verbose==1) fprintf(stderr,"Using 2D-SFS as prior. You didn't run sfstools, right??? Use only -realSFS 1.\n");
         computeVarRey12New(post1, post2, verbose, outpost, nsums, prior12);
       }
     } else {
     // IF FST FILE IS INDEED PROVIDED
     if (verbose==1) fprintf(stderr,"Computing FST and first guess provided.\n");
       array<double> firstfst;
       firstfst=readFSTsub(fstfile, nsites, start.data[n], end.data[n]);
       array <double> sublam;
       sublam = getLambdas(firstfst, prior1, prior2, K, verbose, isfold);
       if (verbose==1) fprintf(stderr,"Computed lambdas.\n");
       if (isfold) {
         computeVarRey2Fold(post1, post2, verbose, outpost, nsums, sublam);
       } else {
         computeVarRey2(post1, post2, verbose, outpost, nsums, sublam);
       }
       delete [] firstfst.data;
       delete [] sublam.data;
    }    

    cleanup(post1);
    cleanup(post2);
    
  } // end blocks iterations
 
  delete [] start.data;
  delete [] end.data;

  free(foutpost);
   
  return 0;

} // end main