static int myGetline(char **lineptr, int *n, FILE *stream)
{
  char *line, *p;
  int size, copy, len;
  int chunkSize = 256 * sizeof(char);

   if (*lineptr == NULL || *n < 2) 
    {
      line = (char *)rax_realloc(*lineptr, chunkSize, FALSE);
      if (line == NULL)
	return -1;
      *lineptr = line;
      *n = chunkSize;
    }

   line = *lineptr;
   size = *n;
  
   copy = size;
   p = line;
   
   while(1)
     {
       while (--copy > 0)
	 {
	   register int c = getc(stream);
	   if (c == EOF)
	     goto lose;
	   else
	     {
	       *p++ = c;
	       if(c == '\n' || c == '\r')	
		 goto win;
	     }
	 }

       /* Need to enlarge the line buffer.  */
       len = p - line;
       size *= 2;
       line = rax_realloc (line, size, FALSE);
       if (line == NULL)
	 goto lose;
       *lineptr = line;
       *n = size;
       p = line + len;
       copy = size - len;
     }
   
 lose:
  if (p == *lineptr)
    return -1;
  /* Return a partial line since we got an error in the middle.  */
 win:
  *p = '\0';
  return p - *lineptr;
}
Beispiel #2
0
static void addInsertion(nodeptr p, double lh, insertions *ins)
{ 
  if(ins->count < ins->maxCount)
    {
      ins->s[ins->count].lh = lh;
      ins->s[ins->count].p = p;
      ins->count = ins->count + 1;               
    }
  else
    {        
      ins->s = rax_realloc(ins->s, sizeof(scores) * ins->maxCount * 2);     

      ins->maxCount *= 2;

      ins->s[ins->count].lh = lh;
      ins->s[ins->count].p = p;
      ins->count = ins->count + 1; 
    }
}
void parsePartitions(analdef *adef, rawdata *rdta, tree *tr)
{
  FILE *f; 
  int numberOfModels = 0; 
  int nbytes = 0;
  char *ch;
  char *cc = (char *)NULL;
  char **p_names;
  int n, i, l;
  int lower, upper, modulo;
  char buf[256];
  int **partitions;
  int pairsCount;
  int as, j;
  int k; 

  f = myfopen(modelFileName, "rb");   

 
  while(myGetline(&cc, &nbytes, f) > -1)
    {     
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  numberOfModels++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }     
  
  rewind(f);
      
  p_names = (char **)rax_malloc(sizeof(char *) * numberOfModels);
  partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels);
      
 
  
  tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * numberOfModels);

      
  for(i = 0; i < numberOfModels; i++) 
    {     
      tr->initialPartitionData[i].protModels = adef->proteinMatrix;
      tr->initialPartitionData[i].usePredefinedProtFreqs  = adef->protEmpiricalFreqs;
      tr->initialPartitionData[i].optimizeBaseFrequencies = FALSE;
      tr->initialPartitionData[i].dataType   = -1;
    }

  for(i = 0; i < numberOfModels; i++)    
    partitions[i] = (int *)NULL;
    
  i = 0;
  while(myGetline(&cc, &nbytes, f) > -1)
    {          
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  n = strlen(cc);	 
	  p_names[i] = (char *)rax_malloc(sizeof(char) * (n + 1));
	  strcpy(&(p_names[i][0]), cc);	 
	  i++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }         

  

  for(i = 0; i < numberOfModels; i++)
    {         
     
      ch = p_names[i];     
      pairsCount = 0;
      skipWhites(&ch);
      
      if(*ch == '=')
	{
	  printf("Identifier missing prior to '=' in %s\n", p_names[i]);
	  exit(-1);
	}
      
      analyzeIdentifier(&ch, i, tr);
      ch++;
            
    numberPairs:
      pairsCount++;
      partitions[i] = (int *)rax_realloc((void *)partitions[i], (1 + 3 * pairsCount) * sizeof(int), FALSE);
      partitions[i][0] = pairsCount;
      partitions[i][3 + 3 * (pairsCount - 1)] = -1; 	
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}   
      
      l = 0;
      while(isNum(*ch))		 
	{
	  /*printf("%c", *ch);*/
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      lower = atoi(buf);
      partitions[i][1 + 3 * (pairsCount - 1)] = lower;   
      
      skipWhites(&ch);
      
      /* NEW */
      
      if((*ch != '-') && (*ch != ','))
	{
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {
	      upper = lower;
	      goto SINGLE_NUMBER;
	    }
	  else
	    {
	      printf("'-' or ',' expected in %s\n", p_names[i]);
	      exit(-1);
	    }
	}	 
      
      if(*ch == ',')
	{	     
	  upper = lower;
	  goto SINGLE_NUMBER;
	}
      
      /* END NEW */
      
      ch++;   
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}    
      
      l = 0;
      while(isNum(*ch))
	{    
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      upper = atoi(buf);     
    SINGLE_NUMBER:
      partitions[i][2 + 3 * (pairsCount - 1)] = upper;        	  
      
      if(upper < lower)
	{
	  printf("Upper bound %d smaller than lower bound %d for this partition: %s\n", upper, lower,  p_names[i]);
	  exit(-1);
	}
      
      skipWhites(&ch);
      
      if(*ch == '\0' || *ch == '\n' || *ch == '\r') /* PC-LINEBREAK*/
	{    
	  goto parsed;
	}
      
      if(*ch == ',')
	{	 
	  ch++;
	  goto numberPairs;
	}
      
      if(*ch == '\\')
	{
	  ch++;
	  skipWhites(&ch);
	  
	  if(!isNum(*ch))
	    {
	      printf("%c Number expected in %s\n", *ch, p_names[i]);
	      exit(-1);
	    }     
	  
	   if(adef->compressPatterns == FALSE)
	    {
	      printf("\nError: You are not allowed to use interleaved partitions, that is, assign non-contiguous sites\n");
	      printf("to the same partition model, when pattern compression is disabled via the -H flag,\n");
	      printf("or when pattern compression is disabled implicitely by some other option that requires it!\n\n");
	      exit(-1);
	    }

	  l = 0;
	  while(isNum(*ch))
	    {
	      buf[l] = *ch;
	      ch++;	
	      l++;
	    }
	  buf[l] = '\0';
	  modulo = atoi(buf);      
	  partitions[i][3 + 3 * (pairsCount - 1)] = modulo; 	
	  
	  skipWhites(&ch);
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {	     
	      goto parsed;
	    }
	  if(*ch == ',')
	    {	       
	      ch++;
	      goto numberPairs;
	    }
	}  
      
      if(*ch == '/')
	{
	  printf("\nRAxML detected the character \"/\" in your partition file.\n");
	  printf("Did you mean to write something similar to this: \"DNA, p1=1-100\\3\" ?\n");
	  printf("It's actually a backslash, not a slash, the program will exit now with an error!\n\n");
	}    
      else
	{ 	 
	  printf("\nRAxML detected the character \"%c\" in your partition file,\n", *ch);
	  printf("while it does not belong there!\n");
	  printf("\nAre you sure that your partition file complies with the RAxML partition file format?\n");
	  printf("\nActually reading the manual, does indeed do help a lot\n\n");
	  printf("The program will exit now with an error!\n\n");
	}

      printf("The problematic line in your partition file is this one here:\n\n");
	
      printf("%s\n\n", p_names[i]);

      assert(0);
       
    parsed:
      ;
    }
  
  fclose(f);
 
  /*********************************************************************************************************************/ 

  for(i = 0; i <= rdta->sites; i++)
    tr->model[i] = -1;
  
  for(i = 0; i < numberOfModels; i++)
    {   
      as = partitions[i][0];     
      
      for(j = 0; j < as; j++)
	{
	  lower = partitions[i][1 + j * 3];
	  upper = partitions[i][2 + j * 3]; 
	  modulo = partitions[i][3 + j * 3];	
	 
	  if(modulo == -1)
	    {
	      for(k = lower; k <= upper; k++)
		setModel(i, k, tr->model);
	    }
	  else
	    {
	      for(k = lower; k <= upper; k += modulo)
		{
		  if(k <= rdta->sites)
		    setModel(i, k, tr->model);	      
		}
	    }
	}        
    }


  for(i = 1; i < rdta->sites + 1; i++)
    {
      
      if(tr->model[i] == -1)
	{
	  printf("ERROR: Alignment Position %d has not been assigned any model\n", i);
	  exit(-1);
	}      
    }  

  for(i = 0; i < numberOfModels; i++)
    {
      rax_free(partitions[i]);
      rax_free(p_names[i]);
    }
  
  rax_free(partitions);
  rax_free(p_names);    
    
  tr->NumberOfModels = numberOfModels;     
  
  if(adef->perGeneBranchLengths)
    {
      if(tr->NumberOfModels > NUM_BRANCHES)
	{
	  printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels);
	  printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES);
	  printf("\n");
	  printf("In order to change this please replace the line \"#define NUM_BRANCHES   %d\" in file \"axml.h\" \n", NUM_BRANCHES);
	  printf("by \"#define NUM_BRANCHES   %d\" and then re-compile RAxML.\n", tr->NumberOfModels);
	  exit(-1);
	}
      else
	{
	  tr->multiBranch = 1;
	  tr->numBranches = tr->NumberOfModels;
	}
    }
}