Exemple #1
0
void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, boolean perSiteScores)
{
  int 
    i,
    *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);

  for (i = 0; i < pr->numberOfPartitions; ++ i)
     rax_free (pr->partitionData[i]->parsVect);

  rax_free (tr->parsimonyScore);
 
  determineUninformativeSites(tr, pr, informative);

  compressDNA(tr, pr, informative, perSiteScores);

  for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
    {
      nodeptr 
        p = tr->nodep[i];

      p->xPars             = 1;
      p->next->xPars       = 0;
      p->next->next->xPars = 0;
    }

  tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);  

  rax_free(informative); 
}
Exemple #2
0
/** @brief Initialize a list of best trees
    
    Initialize a list that will contain the best \a newkeep tree topologies,
    i.e. the ones that yield the best likelihood. Inside the list initialize
    space for \a newkeep + 1 topologies of \a numsp tips. The additional
    topology is the starting one

    @param bt
      Pointer to \a bestlist to be initialized

    @param newkeep
      Number of new topologies to keep

    @param numsp
      Number of species (tips)

    @return
      number of tree topology slots in the list (minus the starting one)

    @todo
      Is there a reason that this function is so complex? Many of the checks
      are unnecessary as the function is called only at two places in the
      code with newkeep=1 and newkeep=20
*/
int initBestTree (bestlist *bt, int newkeep, int numsp)
{ /* initBestTree */
  int  i;

  bt->nkeep = 0;

  if (bt->ninit <= 0) 
    {
      if (! (bt->start = setupTopol(numsp)))  return  0;
      bt->ninit    = -1;
      bt->nvalid   = 0;
      bt->numtrees = 0;
      bt->best     = PLL_UNLIKELY;
      bt->improved = PLL_FALSE;
      bt->byScore  = (topol **) rax_malloc((newkeep + 1) * sizeof(topol *));
      bt->byTopol  = (topol **) rax_malloc((newkeep + 1) * sizeof(topol *));
      if (! bt->byScore || ! bt->byTopol) {
        printf( "initBestTree: malloc failure\n");
        return 0;
      }
    }
  else if (PLL_ABS(newkeep) > bt->ninit) {
    if (newkeep <  0) newkeep = -(bt->ninit);
    else newkeep = bt->ninit;
  }

  if (newkeep < 1) {    /*  Use negative newkeep to clear list  */
    newkeep = -newkeep;
    if (newkeep < 1) newkeep = 1;
    bt->nvalid = 0;
    bt->best = PLL_UNLIKELY;
  }
  
  if (bt->nvalid >= newkeep) {
    bt->nvalid = newkeep;
    bt->worst = bt->byScore[newkeep]->likelihood;
  }
  else 
    {
      bt->worst = PLL_UNLIKELY;
    }
  
  for (i = bt->ninit + 1; i <= newkeep; i++) 
    {    
      if (! (bt->byScore[i] = setupTopol(numsp)))  break;
      bt->byTopol[i] = bt->byScore[i];
      bt->ninit = i;
    }
  
  return  (bt->nkeep = PLL_MIN(newkeep, bt->ninit));
} /* initBestTree */
Exemple #3
0
/** @brief Allocates memory for recomputation structure
 *  
 *  
 *  @todo this should not depend on tr (\a vectorRecomFraction should be a parameter)
 *    PLL_TRUE if recomputation is currently applied 
 *
 */
void allocRecompVectorsInfo(pllInstance *tr)
{
  recompVectors 
    *v = (recompVectors *) rax_malloc(sizeof(recompVectors));

  int 
    num_inner_nodes = tr->mxtips - 2,
                    num_vectors, 
                    i;

  assert(tr->vectorRecomFraction > PLL_MIN_RECOM_FRACTION);
  assert(tr->vectorRecomFraction < PLL_MAX_RECOM_FRACTION);

  num_vectors = (int) (1 + tr->vectorRecomFraction * (float)num_inner_nodes); 

  int theoretical_minimum_of_vectors = 3 + ((int)(log((double)tr->mxtips)/log(2.0)));
  //printBothOpen("Try to use %d ancestral vectors, min required %d\n", num_vectors, theoretical_minimum_of_vectors);

  assert(num_vectors >= theoretical_minimum_of_vectors);
  assert(num_vectors < tr->mxtips);


  v->numVectors = num_vectors; /* use minimum bound theoretical */

  /* init vectors tracking */

  v->iVector         = (int *) rax_malloc((size_t)num_vectors * sizeof(int));
  v->unpinnable      = (boolean *) rax_malloc((size_t)num_vectors * sizeof(boolean));

  for(i = 0; i < num_vectors; i++)
  {
    v->iVector[i]         = PLL_SLOT_UNUSED;
    v->unpinnable[i]      = PLL_FALSE;
  }

  v->iNode      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
  v->stlen      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));

  for(i = 0; i < num_inner_nodes; i++)
  {
    v->iNode[i] = PLL_NODE_UNPINNED;
    v->stlen[i] = PLL_INNER_NODE_INIT_STLEN;
  }

  v->allSlotsBusy = PLL_FALSE;

  /* init nodes tracking */

  v->maxVectorsUsed = 0;
  tr->rvec = v;
}
Exemple #4
0
/** @brief Initializes space as large as the tree
  *
  * @param rl
  *   RELL 
  *
  * @param tr
  *   PLL instance
  *
  * @param n
  *   Number of
  *
  * @todo
  *   Don't know what is this used for. Something with RELL?
  *
  */
void initTL(topolRELL_LIST *rl, pllInstance *tr, int n)
{
  int i;

  rl->max = n; 
  rl->t = (topolRELL **)rax_malloc(sizeof(topolRELL *) * n);

  for(i = 0; i < n; i++)
    {
      rl->t[i] = (topolRELL *)rax_malloc(sizeof(topolRELL));
      rl->t[i]->connect = (connectRELL *)rax_malloc((2 * tr->mxtips - 3) * sizeof(connectRELL));
      rl->t[i]->likelihood = PLL_UNLIKELY;     
    }
}
void treeEvaluateProgressive(tree *tr)
{  
  int 
    i, k;

  tr->branchCounter = 0;
  tr->numberOfBranches = 2 * tr->mxtips - 3;
  
  tr->bInf = (branchInfo*)rax_malloc(tr->numberOfBranches * sizeof(branchInfo));  

  setupBranches(tr, tr->start->back, tr->bInf);

  assert(tr->branchCounter == tr->numberOfBranches);
  
  for(i = 0; i < tr->numBranches; i++)
    tr->partitionConverged[i] = FALSE;
  
  for(i = 0; i < 10; i++)
    {
      for(k = 0; k < tr->numberOfBranches; k++)
	{      
	  update(tr, tr->bInf[k].oP);
	  newviewGeneric(tr, tr->bInf[k].oP);     
	}
      evaluateGenericInitrav(tr, tr->start);
      printf("It %d %f \n", i, tr->likelihood);
    }
}
Exemple #6
0
int treeOptimizeThorough(tree *tr, int mintrav, int maxtrav)
{
  int i;   
  bestlist *bestT;  

  nodeRectifier(tr);

  bestT = (bestlist *) rax_malloc(sizeof(bestlist));
  bestT->ninit = 0;
  initBestTree(bestT, 1, tr->mxtips);
  
  if (maxtrav > tr->ntips - 3)  
    maxtrav = tr->ntips - 3;      
 
  tr->startLH = tr->endLH = tr->likelihood;  

  for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
    {     

      
      tr->bestOfNode = unlikely;     
      if(rearrangeBIG(tr, tr->nodep[i], mintrav, maxtrav))
	{          
	 
	  if((tr->endLH > tr->startLH) && (tr->bestOfNode != unlikely))
	    {			    
	      restoreTreeFast(tr);	     
	      quickSmoothLocal(tr, 3);
	      tr->startLH = tr->endLH = tr->likelihood;	 		     
	    }	 
	  else
	    {		 
	      if(tr->bestOfNode != unlikely)
		{		     
		  resetBestTree(bestT);		  		  		  
		  saveBestTree(bestT, tr);
		  restoreTreeFast(tr);		  
		  quickSmoothLocal(tr, 3);		  		    
		  if(tr->likelihood < tr->startLH)		    		    
		    {
		      int res;
		      res = recallBestTree(bestT, 1, tr);		      		    
		      assert(res > 0);
		    }
		  else				    
		    tr->startLH = tr->endLH = tr->likelihood;		  
		}
	    }
	}

    	
    }    

  freeBestTree(bestT);
  rax_free(bestT);

  return 1;     
}
void *rax_calloc(size_t n, size_t size) 
{
  void 
    *ptr = rax_malloc(size * n);  

   memset(ptr, 0, size * n);

   return ptr;
}
Exemple #8
0
void allocateMultifurcations(tree *tr, tree *smallTree)
{ 
  int
    i,
    tips,
    inter; 

  smallTree->numBranches = tr->numBranches;

  smallTree->mxtips = tr->mxtips;

  //printf("Small tree tiups: %d\n", smallTree->mxtips);

  smallTree->nameHash = tr->nameHash;

  smallTree->nameList = tr->nameList;

  tips  = tr->mxtips;
  inter = tr->mxtips - 1;
  
  smallTree->nodep = (nodeptr *)rax_malloc((tips + 3 * inter) * sizeof(nodeptr));

  smallTree->maxNodes = tips + 3 * inter;

  smallTree->nodep[0] = (node *) NULL;

  for (i = 1; i <= tips; i++)
    {
      smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));      
      memcpy(smallTree->nodep[i], tr->nodep[i], sizeof(node));
      smallTree->nodep[i]->back = (node *) NULL;
      smallTree->nodep[i]->next = (node *) NULL;
    }

  for(i = tips + 1; i < tips + 3 * inter; i++)
    {     
      smallTree->nodep[i] = (nodeptr)rax_malloc(sizeof(node));
      smallTree->nodep[i]->number = i;     
      smallTree->nodep[i]->back = (node *) NULL;
      smallTree->nodep[i]->next = (node *) NULL;
    }
  
}
Exemple #9
0
void allocTraversalCounter(pllInstance *tr)
{
  traversalCounter 
    *tc;

  int 
    k;

  tc = (traversalCounter *)rax_malloc(sizeof(traversalCounter));

  tc->travlenFreq = (unsigned int *)rax_malloc(tr->mxtips * sizeof(int));

  for(k = 0; k < tr->mxtips; k++)
    tc->travlenFreq[k] = 0;

  tc->tt = 0;
  tc->ti = 0;
  tc->ii = 0;
  tc->numTraversals = 0;
  tr->travCounter = tc;
}
Exemple #10
0
void *rax_calloc(size_t n, size_t size) 
{
   void 
     *ptr = rax_malloc(size * n);

   if (ptr == (void*)NULL) 
     return (void*)NULL;

   memset(ptr, 0, size * n);

   return ptr;
}
static double evaluatePartialGTRCATSECONDARY(int i, double ki, int counter,  traversalInfo *ti, double qz,
					     int w, double *EIGN, double *EI, double *EV,
					     double *tipVector, unsigned char **yVector, 
					     int branchReference, int mxtips)
{
  double lz, term;       
  double  d[16];
  double   *x1, *x2; 
  int scale = 0, k, l;
  double *lVector = (double *)rax_malloc(sizeof(double) * 16 * mxtips);
 
  traversalInfo *trav = &ti[0];

  assert(isTip(trav->pNumber, mxtips));
     
  x1 = &(tipVector[16 *  yVector[trav->pNumber][i]]);   

  for(k = 1; k < counter; k++)                
    computeVectorGTRCATSECONDARY(lVector, &scale, ki, i, ti[k].qz[branchReference], ti[k].rz[branchReference], 
				 &ti[k], EIGN, EI, EV, 
				 tipVector, yVector, mxtips);       
   
  x2 = &lVector[16 * (trav->qNumber - mxtips)];

      

  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
  
  if(qz < zmin) 
    lz = zmin;
  lz  = log(qz); 
  lz *= ki;
  
  d[0] = 1.0;
  for(l = 1; l < 16; l++)
    d[l] = EXP (EIGN[l-1] * lz);

  term = 0.0;
  
  for(l = 0; l < 16; l++)
    term += x1[l] * x2[l] * d[l];   

  term = LOG(FABS(term)) + (scale * LOG(minlikelihood));   

  term = term * w;

  rax_free(lVector);
  

  return  term;
}
Exemple #12
0
/** @brief Allocate and initialize space for a tree topology
    
    Allocate and initialize a \a topol structure for a tree topology of
    \a maxtips tips

    @param
      Number of tips of topology

    @return
      Pointer to the allocated \a topol structure
*/
static topol  *setupTopol (int maxtips)
{
  topol   *tpl;

  if (! (tpl = (topol *) rax_malloc(sizeof(topol))) || 
      ! (tpl->links = (connptr) rax_malloc((2*maxtips-3) * sizeof(connect))))
    {
      printf("ERROR: Unable to get topology memory");
      tpl = (topol *) NULL;
    }
  else 
    {
      tpl->likelihood  = PLL_UNLIKELY;
      tpl->start       = (node *) NULL;
      tpl->nextlink    = 0;
      tpl->ntips       = 0;
      tpl->nextnode    = 0;    
      tpl->scrNum      = 0;     /* position in sorted list of scores */
      tpl->tplNum      = 0;     /* position in sorted list of trees */	      
    }
  
  return  tpl;
} 
void initInfoList(int n)
{
  int i;

  iList.n = n;
  iList.valid = 0;
  iList.list = (bestInfo *)rax_malloc(sizeof(bestInfo) * n);

  for(i = 0; i < n; i++)
    {
      iList.list[i].node = (nodeptr)NULL;
      iList.list[i].likelihood = unlikely;
    }
}
Exemple #14
0
/** @ingroup alignmentGroup
    @brief Parse a PHYLIP file

    Parses the PHYLIP file \a filename and returns a ::pllAlignmentData structure
    with the alignment.

    @param filename
      Name of file to be parsed

    @return
      Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
      in case of failure.
*/
static pllAlignmentData *
pllParsePHYLIP (const char * filename)
{
    int i, input, sequenceCount, sequenceLength;
    char * rawdata;
    long filesize;
    pllAlignmentData * alignmentData;

    rawdata = pllReadFile (filename, &filesize);
    if (!rawdata)
    {
        errno = PLL_ERROR_FILE_OPEN;
        return (NULL);
    }

    init_lexan (rawdata, filesize);
    input = get_next_symbol();

    /* parse the header to obtain the number of taxa and sequence length */
    if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
    {
        rax_free (rawdata);
        fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
        errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
        return (NULL);
    }

    lex_table_amend_phylip();

    /* allocate alignment structure */
    alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);

    if (! parse_phylip (alignmentData, input))
    {
        errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
        pllAlignmentDataDestroy (alignmentData);
        lex_table_restore();
        rax_free (rawdata);
        return (NULL);
    }

    lex_table_restore();
    rax_free (rawdata);

    alignmentData->siteWeights  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
    for (i = 0; i < alignmentData->sequenceLength; ++ i)
        alignmentData->siteWeights[i] = 1;

    return (alignmentData);
}
Exemple #15
0
void addword(char *s, stringHashtable *h, int nodeNumber)
{
  hashNumberType position = hashString(s, h->tableSize);
  stringEntry *p = h->table[position];
  
  for(; p!= NULL; p = p->next)
    {
      if(strcmp(s, p->word) == 0)		 
	return;	  	
    }

  p = (stringEntry *)rax_malloc(sizeof(stringEntry));

  assert(p);
  
  p->nodeNumber = nodeNumber;
  p->word = (char *)rax_malloc((strlen(s) + 1) * sizeof(char));

  strcpy(p->word, s);
  
  p->next =  h->table[position];
  
  h->table[position] = p;
}
static pllBipartitionEntry *initEntry(void)
{
    pllBipartitionEntry * e = (pllBipartitionEntry *)rax_malloc(sizeof(pllBipartitionEntry));

    e->bitVector     = (unsigned int*)NULL;
    e->treeVector    = (unsigned int*)NULL;
    e->supportVector = (int*)NULL;
    e->bipNumber  = 0;
    e->bipNumber2 = 0;
    e->supportFromTreeset[0] = 0;
    e->supportFromTreeset[1] = 0;
    e->next       = (pllBipartitionEntry *)NULL;

    return e;
}
Exemple #17
0
void nodeRectifier(pllInstance *tr)
{
  nodeptr *np = (nodeptr *)rax_malloc(2 * tr->mxtips * sizeof(nodeptr));
  int i;
  int count = 0;
  
  tr->start       = tr->nodep[1];
  tr->rooted      = PLL_FALSE;

  /* TODO why is tr->rooted set to PLL_FALSE here ?*/
  
  for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++)
    np[i] = tr->nodep[i];           
  
  reorderNodes(tr, np, tr->start->back, &count); 

 
  rax_free(np);
}
Exemple #18
0
stringHashtable *initStringHashTable(hashNumberType n)
{
  /* 
     init with primes 
  */
    
  static const hashNumberType initTable[] = {53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
					     196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
					     50331653, 100663319, 201326611, 402653189, 805306457, 1610612741};
 

  /* init with powers of two

  static const  hashNumberType initTable[] = {64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
					      32768, 65536, 131072, 262144, 524288, 1048576, 2097152,
					      4194304, 8388608, 16777216, 33554432, 67108864, 134217728,
					      268435456, 536870912, 1073741824, 2147483648U};
  */
  
  stringHashtable *h = (stringHashtable*)rax_malloc(sizeof(stringHashtable));
  
  hashNumberType
    tableSize,
    i,
    primeTableLength = sizeof(initTable)/sizeof(initTable[0]),
    maxSize = (hashNumberType)-1;    

  assert(n <= maxSize);

  i = 0;

  while(initTable[i] < n && i < primeTableLength)
    i++;

  assert(i < primeTableLength);

  tableSize = initTable[i];  

  h->table = (stringEntry**)rax_calloc(tableSize, sizeof(stringEntry*));
  h->tableSize = tableSize;    

  return h;
}
Exemple #19
0
static pInfo *allocParams(tree *tr)
{
  int i;
  pInfo *partBuffer = (pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels);

  for(i = 0; i < tr->NumberOfModels; i++)
    {
      const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[i]));

      partBuffer[i].EIGN = (double*)rax_malloc(pl->eignLength * sizeof(double));
      partBuffer[i].EV   = (double*)rax_malloc(pl->evLength * sizeof(double));
      partBuffer[i].EI   = (double*)rax_malloc(pl->eiLength * sizeof(double));	  
      partBuffer[i].substRates = (double *)rax_malloc(pl->substRatesLength * sizeof(double));	  
      partBuffer[i].frequencies =  (double*)rax_malloc(pl->frequenciesLength * sizeof(double));	  
      partBuffer[i].tipVector   = (double *)rax_malloc(pl->tipVectorLength * sizeof(double));
      
     
    }

  return partBuffer;      
}
Exemple #20
0
static int
parse_newick (pllStack ** stack, int * inp)
{
  pllNewickNodeInfo * item = NULL;
  int item_active = 0;
  pllLexToken token;
  int input;
  pllLexToken prev_token;
  int nop = 0;          /* number of open parentheses */
  int depth = 0;

  prev_token.tokenType = PLL_TOKEN_UNKNOWN;

  input = *inp;

  NEXT_TOKEN
  
  while (token.tokenType != PLL_TOKEN_EOF && token.tokenType != PLL_TOKEN_UNKNOWN)
  {
    switch (token.tokenType)
     {
       case PLL_TOKEN_OPAREN:
#ifdef PLLDEBUG
       printf ("PLL_TOKEN_OPAREN\n");
#endif
        ++nop;
        memcpy (&prev_token, &token, sizeof (pllLexToken));
        ++depth;
        break;

       case PLL_TOKEN_CPAREN:
#ifdef PLLDEBUG
       printf ("PLL_TOKEN_CPAREN\n");
#endif
        if (prev_token.tokenType != PLL_TOKEN_CPAREN  &&
            prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
            prev_token.tokenType != PLL_TOKEN_STRING  &&
            prev_token.tokenType != PLL_TOKEN_NUMBER  &&
            prev_token.tokenType != PLL_TOKEN_FLOAT) return (0);

        if (!nop) return (0);
        --nop;
        memcpy (&prev_token, &token, sizeof (pllLexToken));

        /* push to the stack */
        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo)); // possibly not nec
        //if (item->name   == NULL) item->name   = strdup ("INTERNAL_NODE");
        if (item->name == NULL) 
         {
           item->name = (char *) rax_malloc ((strlen("INTERNAL_NODE") + 1) * sizeof (char));
           strcpy (item->name, "INTERNAL_NODE");
         }

        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
        if (item->branch == NULL) 
         {
           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
           strcpy (item->branch, "0.000000");
         }
        item->depth = depth;
        pllStackPush (stack, item);
        item_active  = 1;       /* active = 1 */
        item = NULL;
        --depth;
        break;

       case PLL_TOKEN_STRING:
#ifdef PLLDEBUG
       printf ("PLL_TOKEN_STRING      %.*s\n", token.len, token.lexeme);
#endif
        if (prev_token.tokenType != PLL_TOKEN_OPAREN &&
            prev_token.tokenType != PLL_TOKEN_CPAREN &&
            prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
            prev_token.tokenType != PLL_TOKEN_COMMA) return (0);
        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
        //item->name = strndup (token.lexeme, token.len);
        item->name = (char *) rax_malloc ((token.len + 1) * sizeof (char));
        strncpy (item->name, token.lexeme, token.len);
        item->name[token.len] = 0;

        item_active = 1;
        item->depth = depth;
        if (prev_token.tokenType == PLL_TOKEN_COMMA  ||
            prev_token.tokenType == PLL_TOKEN_OPAREN ||
            prev_token.tokenType == PLL_TOKEN_UNKNOWN) item->leaf = 1;
        memcpy (&prev_token, &token, sizeof (pllLexToken));
        break;

       case PLL_TOKEN_FLOAT:
       case PLL_TOKEN_NUMBER:
#ifdef PLLDEBUG
       if (token.tokenType == PLL_TOKEN_FLOAT) printf ("PLL_TOKEN_FLOAT\n"); else printf ("PLL_TOKEN_NUMBER\n");
#endif
         if  (prev_token.tokenType != PLL_TOKEN_OPAREN &&
              prev_token.tokenType != PLL_TOKEN_CPAREN &&
              prev_token.tokenType != PLL_TOKEN_COLON  &&
              prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
              prev_token.tokenType != PLL_TOKEN_COMMA) return (0);
        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
        if (prev_token.tokenType == PLL_TOKEN_COLON)
         {
           //item->branch = strndup (token.lexeme, token.len);
           item->branch = (char *) rax_malloc ((token.len + 1) * sizeof (char));
           strncpy (item->branch, token.lexeme, token.len);
           item->branch[token.len] = 0;
         }
        else
         {
           if (prev_token.tokenType == PLL_TOKEN_COMMA  ||
               prev_token.tokenType == PLL_TOKEN_OPAREN ||
               prev_token.tokenType == PLL_TOKEN_UNKNOWN) item->leaf = 1;
           //if (prev_token.tokenType != PLL_TOKEN_UNKNOWN) ++ indent;
           //item->name = strndup (token.lexeme, token.len);
           item->name = (char *) rax_malloc ((token.len + 1) * sizeof (char));
           strncpy (item->name, token.lexeme, token.len);
           item->name[token.len] = 0;
         }
        item_active = 1;
        item->depth = depth;
        memcpy (&prev_token, &token, sizeof (pllLexToken));
        break;

       case PLL_TOKEN_COLON:
#ifdef PLLDEBUG
       printf ("PLL_TOKEN_COLON\n");
#endif
        if (prev_token.tokenType != PLL_TOKEN_CPAREN &&
            prev_token.tokenType != PLL_TOKEN_STRING &&
            prev_token.tokenType != PLL_TOKEN_FLOAT  &&
            prev_token.tokenType != PLL_TOKEN_NUMBER) return (0);
        memcpy (&prev_token, &token, sizeof (pllLexToken));
        break;

       case PLL_TOKEN_COMMA:
#ifdef PLLDEBUG
       printf ("PLL_TOKEN_COMMA\n");
#endif
        if (prev_token.tokenType != PLL_TOKEN_CPAREN &&
             prev_token.tokenType != PLL_TOKEN_STRING &&
             prev_token.tokenType != PLL_TOKEN_FLOAT && 
             prev_token.tokenType != PLL_TOKEN_NUMBER) return (0);
        memcpy (&prev_token, &token, sizeof (pllLexToken));
        
        /* push to the stack */
        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo)); // possibly not nece
        //if (item->name   == NULL) item->name   = strdup ("INTERNAL_NODE");
        if (item->name == NULL) 
         {
           item->name = (char *) rax_malloc ((strlen("INTERNAL_NODE") + 1) * sizeof (char));
           strcpy (item->name, "INTERNAL_NODE");
         }
        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
        if (item->branch == NULL) 
         {
           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
           strcpy (item->branch, "0.000000");
         }
        item->depth = depth;
        pllStackPush (stack, item);
        item_active  = 0;
        item = NULL;
        break;

       case PLL_TOKEN_SEMICOLON:
#ifdef PLLDEBUG
        printf ("PLL_TOKEN_SEMICOLON\n");
#endif
        /* push to the stack */
        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
        //if (item->name   == NULL) item->name   = strdup ("ROOT_NODE");
        if (item->name == NULL) 
         {
           item->name = (char *) rax_malloc ((strlen("ROOT_NODE") + 1) * sizeof (char));
           strcpy (item->name, "ROOT_NODE");
         }
        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
        if (item->branch == NULL) 
         {
           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
           strcpy (item->branch, "0.000000");
         }
        pllStackPush (stack, item);
        item_active  = 0;
        item = NULL;
        break;
       default:
#ifdef __DEBUGGING_MODE
         printf ("Unknown token: %d\n", token.tokenType);
#endif
       // TODO: Finish this part and add error codes
        break;
     }
    NEXT_TOKEN
    CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE);
  }
  if (item_active)
   {
     if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
     //if (item->name   == NULL) item->name   = strdup ("ROOT_NODE");
     if (item->name == NULL) 
      {
        item->name = (char *) rax_malloc ((strlen("ROOT_NODE") + 1) * sizeof (char));
        strcpy (item->name, "ROOT_NODE");
      }
     //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
     if (item->branch == NULL) 
      {
        item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
        strcpy (item->branch, "0.000000");
      }
     pllStackPush (stack, item);
     item_active  = 0;
   }

  if (nop || token.tokenType == PLL_TOKEN_UNKNOWN) 
   {
     return (0);
   }

  return (1);
}
void handleExcludeFile(tree *tr, analdef *adef, rawdata *rdta)
{
  FILE *f;  
  char buf[256];
  int
    ch,
    j, value, i,
    state = 0,
    numberOfModels = 0,
    l = -1,
    excludeRegion   = 0,
    excludedColumns = 0,
    modelCounter    = 1;
  int
    *excludeArray, *countArray, *modelList;
  int
    **partitions;

  printf("\n\n");

  f = myfopen(excludeFileName, "rb");    

  while((ch = getc(f)) != EOF)
    {
      if(ch == '-')
	numberOfModels++;
    } 

  excludeArray = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
  countArray   = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1));
  modelList    = (int *)rax_malloc((rdta->sites + 1)* sizeof(int));

  partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels);  
  for(i = 0; i < numberOfModels; i++)
    partitions[i] = (int *)rax_malloc(sizeof(int) * 2);

  rewind(f);
  
  while((ch = getc(f)) != EOF)
    {     
      switch(state)
	{
	case 0: /* get first number */
	  if(!whitechar(ch))
	    {
	      if(!isNum(ch))
		{
		  printf("exclude file must have format: number-number [number-number]*\n");
		  exit(-1);
		}
	      l = 0;
	      buf[l++] = ch;
	      state = 1;
	    }
	  break;
	case 1: /*get the number or detect - */
	  if(!isNum(ch) && ch != '-')
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  if(isNum(ch))
	    {
	      buf[l++] = ch;
	    }
	  else
	    {
	      buf[l++] = '\0';	     
	      value = atoi(buf);
	      partitions[excludeRegion][0] = value;
	      state = 2;
	    }
	  break;
	case 2: /*get second number */
	  if(!isNum(ch))
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  l = 0;
	  buf[l++] = ch;
	  state = 3;
	  break;
	case 3: /* continue second number or find end */	 
	  if(!isNum(ch) && !whitechar(ch))
	    {
	      printf("exclude file must have format: number-number [number-number]*\n");
	      exit(-1);
	    }
	  if(isNum(ch))
	    {
	      buf[l++] = ch;
	    }
	  else
	    {	      
	      buf[l++] = '\0';	     
	      value = atoi(buf);
	      partitions[excludeRegion][1] = value;
	      excludeRegion++;
	      state = 0;
	    }
	  break;
	default:
	  assert(0);
	}
    }
     
  if(state == 3)
    {
      buf[l++] = '\0';     
      value = atoi(buf);
      partitions[excludeRegion][1] = value;
      excludeRegion++;
    }
  
  assert(excludeRegion == numberOfModels);

  for(i = 0; i <= rdta->sites; i++)
    {
      excludeArray[i] = -1;
      countArray[i] = 0;      
      modelList[i] = -1;
    }  

  for(i = 0; i < numberOfModels; i++)
    {
      int lower = partitions[i][0];
      int upper = partitions[i][1];

      if(lower > upper)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("lower bound %d is greater than upper bound %d\n", lower, upper);
	  exit(-1);
	}

      if(lower == 0)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("lower bound must be greater than 0\n");
	  exit(-1);
	}

      if(upper > rdta->sites)
	{
	  printf("Misspecified exclude region %d\n", i);
	  printf("upper bound %d must be smaller than %d\n", upper, (rdta->sites + 1));
	  exit(-1);
	}	
      for(j = lower; j <= upper; j++)
	{
	  if(excludeArray[j] != -1)
	    {
	      printf("WARNING: Exclude regions %d and %d overlap at position %d (already excluded %d times)\n", 
		     excludeArray[j], i, j, countArray[j]);
	    }
	  excludeArray[j] = i;
	  countArray[j]   =  countArray[j] + 1;	 
	}
    }

  for(i = 1; i <= rdta->sites; i++)
    {
      if(excludeArray[i] != -1)
	excludedColumns++;
      else
	{
	  modelList[modelCounter] = tr->model[i];
	  modelCounter++;
	}
    }

  printf("You have excluded %d out of %d columns\n", excludedColumns, rdta->sites);

  if(excludedColumns == rdta->sites)
    {
      printf("Error: You have excluded all sites\n");
      exit(-1);
    }

  if(adef->useSecondaryStructure && (excludedColumns > 0))
    {
      char mfn[2048];
      int countColumns;
      FILE *newFile;

      assert(adef->useMultipleModel);

      strcpy(mfn, secondaryStructureFileName);
      strcat(mfn, ".");
      strcat(mfn, excludeFileName);

      newFile = myfopen(mfn, "wb");

      printBothOpen("\nA secondary structure file with analogous structure assignments for non-excluded columns is printed to file %s\n", mfn);        	     	    
		  
      for(i = 1, countColumns = 0; i <= rdta->sites; i++)
	{		  
	  if(excludeArray[i] == -1)
	    fprintf(newFile, "%c", tr->secondaryStructureInput[i - 1]);
	  else
	    countColumns++;
	}
		  
      assert(countColumns == excludedColumns);
		  
      fprintf(newFile,"\n");
		  
      fclose(newFile);
    }


  if(adef->useMultipleModel && (excludedColumns > 0))
    {      
      char mfn[2048];
      FILE *newFile;

      strcpy(mfn, modelFileName);
      strcat(mfn, ".");
      strcat(mfn, excludeFileName);

      newFile = myfopen(mfn, "wb");

      printf("\nA partition file with analogous model assignments for non-excluded columns is printed to file %s\n", mfn);     
	      
      for(i = 0; i < tr->NumberOfModels; i++)
	{
	  boolean modelStillExists = FALSE;
		  
	  for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++)
	    {
	      if(modelList[j] == i)
		modelStillExists = TRUE;
	    }

	  if(modelStillExists)
	    {	  	      
	      int k = 1;
	      int lower, upper;
	      int parts = 0;

	      switch(tr->partitionData[i].dataType)
		{
		case AA_DATA:		      		     
		  {
		    char AAmodel[1024];
		    
		    if(tr->partitionData[i].ascBias)
		      {
			strcpy(AAmodel, "ASC_");
			strcat(AAmodel, protModels[tr->partitionData[i].protModels]);
		      }
		    else
		      strcpy(AAmodel, protModels[tr->partitionData[i].protModels]);
		    if(tr->partitionData[i].usePredefinedProtFreqs == FALSE)
		      strcat(AAmodel, "F");
		    if(tr->partitionData[i].optimizeBaseFrequencies)
		      strcat(AAmodel, "X");

		    assert(!(tr->partitionData[i].optimizeBaseFrequencies && tr->partitionData[i].usePredefinedProtFreqs));
		    
		    fprintf(newFile, "%s, ", AAmodel);
		  }
		  break;
		case DNA_DATA:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_DNAX, ");
		      else
			fprintf(newFile, "DNAX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_DNA, ");
		      else
			fprintf(newFile, "DNA, ");
		    }
		  break;
		case BINARY_DATA:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_BINX, ");
		      else
			fprintf(newFile, "BINX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_BIN, ");
		      else
			fprintf(newFile, "BIN, ");
		    }
		  break;
		case GENERIC_32:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_MULTIX, ");
		      else
			fprintf(newFile, "MULTIX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_MULTI, ");
		      else
			fprintf(newFile, "MULTI, ");
		    }
		  break;
		case GENERIC_64:
		  if(tr->partitionData[i].optimizeBaseFrequencies)
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_CODONX, ");
		      else
			fprintf(newFile, "CODONX, ");
		    }
		  else
		    {
		      if(tr->partitionData[i].ascBias)
			fprintf(newFile, "ASC_CODON, ");
		      else
			fprintf(newFile, "CODON, ");
		    }
		  break;
		default:
		  assert(0);
		}

	      fprintf(newFile, "%s = ", tr->partitionData[i].partitionName);
	      
	      while(k <= rdta->sites)
		{
		  if(modelList[k] == i)
		    {
		      lower = k;
		      while((modelList[k + 1] == i) && (k <= rdta->sites))		      			
			k++;
		      upper = k;
		      
		      if(lower == upper)		  
			{
			  if(parts == 0)
			    fprintf(newFile, "%d", lower);
			  else
			    fprintf(newFile, ",%d", lower);
			}
		      else
			{
			  if(parts == 0)
			    fprintf(newFile, "%d-%d", lower, upper);
			  else
			    fprintf(newFile, ",%d-%d", lower, upper);
			}		  
		      parts++;
		    }
		  k++;
		}
	      fprintf(newFile, "\n");
	    }		  
	}	
      fclose(newFile);
    }

  
  {
    FILE *newFile;
    char mfn[2048];
   

    strcpy(mfn, seq_file);
    strcat(mfn, ".");
    strcat(mfn, excludeFileName);
    
    newFile = myfopen(mfn, "wb");
    
    printf("\nAn alignment file with excluded columns is printed to file %s\n\n\n", mfn);
    
    fprintf(newFile, "%d %d\n", tr->mxtips, rdta->sites - excludedColumns);
    
    for(i = 1; i <= tr->mxtips; i++)
      {   
	unsigned char *tipI =  &(rdta->y[i][1]);
	fprintf(newFile, "%s ", tr->nameList[i]);
	
	for(j = 0; j < rdta->sites; j++)
	  {
	    if(excludeArray[j + 1] == -1)	      
	      fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j]));	       	  
	  }
	
	fprintf(newFile, "\n");
      }
    
    fclose(newFile);
  }

  
  fclose(f);
  for(i = 0; i < numberOfModels; i++)
    rax_free(partitions[i]);
  rax_free(partitions);  
  rax_free(excludeArray);
  rax_free(countArray);
  rax_free(modelList);
}
static void analyzeIdentifier(char **ch, int modelNumber, tree *tr)
{
  char 
    *start = *ch,
    ident[2048] = "",
    model[2048] = "",
    thisModel[2048] = "";
  
  int   
    i = 0, 
    n, 
    j,
    containsComma = 0;

  while(**ch != '=')
    {
      if(**ch == '\n' || **ch == '\r')
	{
	  printf("\nPartition file parsing error!\n");
	  printf("Each line must contain a \"=\" character\n");
	  printf("Offending line: %s\n", start);
	  printf("RAxML will exit now.\n\n");
	  errorExit(-1);
	}

      if(**ch != ' ' && **ch != '\t')
	{
	  ident[i] = **ch;      
	  i++;
	}      
      *ch = *ch + 1;
    }
  
  ident[i] = '\0';

  n = i;
  i = 0;
  
  

  for(i = 0; i < n; i++)
    if(ident[i] == ',') 
      containsComma = 1;

  if(!containsComma)
    {
      printf("Error, model file must have format: Substitution model, then a comma, and then the partition name\n");
      exit(-1);
    }
  else
    {
      boolean 
	analyzeRest = TRUE,
	useExternalFile = FALSE,
	found = FALSE;
      
      int 
	openBracket = 0,
	closeBracket = 0,
	openPos = 0,
	closePos = 0;
            
      i = 0;
      
      while(ident[i] != ',')
	{	 
	  if(ident[i] == '[')
	    {
	      openPos = i;
	      openBracket++;
	    }
	  if(ident[i] == ']')
	    {
	      closePos = i;
	      closeBracket++;
	    }
	  model[i] = ident[i];
	  i++;
	}     

      if(closeBracket > 0 || openBracket > 0)
	{
	  if((closeBracket == 1) && (openBracket == 1) && (openPos < closePos))
	    useExternalFile = TRUE;
	  else
	    {
	      printf("\nError: Apparently you want to specify a user-defined protein substitution model\n");
	      printf("or ascertainment bias correction model that shall be read from file\n");
	      printf("It must be enclosed in opening and closing bracktes like this: [prot=fileName] or [asc=fileName]\n\n");
	      printf("you specified: %s\n\n", model);
	      exit(-1);
	    }
	}
      
      if(useExternalFile)
	{
	  char
	    designator[2048] = "",
	    fileName[2048] = "";

	  int 
	    pos,	   
	    index,
	    lower = 0,
	    upper = i - 1;	       
	  
	  boolean
	    isProteinFile   = TRUE;

	  while(model[lower] == '[')
	    lower++;

	  while(model[upper] == ']')
	    upper--;
	  
	  assert(lower < upper);

	  index = lower;
	  pos = 0;	  
	  
	  while(model[index] != '~')
	    {
	      designator[pos] = model[index];
	      pos++;
	      index++;
	    }

	  designator[pos] = '\0';
	  
	  if(strcmp(designator, "asc") == 0)	    
	    isProteinFile = FALSE;	    
	  else
	    {
	      if(strcmp(designator, "prot") == 0)
		isProteinFile = TRUE;
	      else
		{
		  printf("Error external partition file type %s does not exist\n", designator);
		  printf("Available file types: asc and prot\n");
		  exit(-1);
		}	       
	    }	    	 
	  
	  while(model[index] == '~')	    	     
	    index++;	   

	  pos = 0;
	  
	  while(model[index] != ']')
	    {
	      fileName[pos] = model[index];
	      index++;
	      pos++;
	    }	    	  
	  
	  fileName[pos] = '\0';	 

	  if(!filexists(fileName))
	    {
	      printf("\n\ncustom protein substitution or ascertainment bias file [%s] you want to use does not exist!\n", fileName);
	      printf("you need to specify the full path\n");
	      printf("the file name shall not contain blanks!\n\n");
	      exit(-1);
	    }
	  
	  
	  if(isProteinFile)
	    {
	      strcpy(tr->initialPartitionData[modelNumber].proteinSubstitutionFileName, fileName);
	      /*printf("%s \n", tr->initialPartitionData[modelNumber].proteinSubstitutionFileName);*/
	      
	      tr->initialPartitionData[modelNumber].protModels = PROT_FILE;		  
	      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = TRUE;
	      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;
	      analyzeRest = FALSE;
	    }
	  else
	    {
	      int 
		newIndex = 0;
	      	      
	      strcpy(tr->initialPartitionData[modelNumber].ascFileName, fileName); 
	      
	      i = 0;
	      
	      while(ident[i] != ',')
		{
		  if(ident[i] == '\0')
		    {
		      printf("Expecting two commas in string %s\n", ident);
		      exit(-1);
		    }
		  i++;
		}
	      
	      i++;	     

	      while(ident[i] != ',')
		{
		  if(ident[i] == '\0')
		    {
		      printf("Expecting two commas in string %s\n", ident);
		      exit(-1);
		    }
		  model[newIndex] = ident[i];
		  i++;
		  newIndex++;
		}
		    	     
	      model[newIndex] = '\0';
	    }	 
	}
      
      if(analyzeRest)
	{
	  /* AA */
	  tr->initialPartitionData[modelNumber].ascBias = FALSE;
	   
	  for(i = 0; i < NUM_PROT_MODELS && !found; i++)
	    {	
	      strcpy(thisModel, protModels[i]);
	      
	      if(strcasecmp(model, thisModel) == 0)
		{	      	      
		  tr->initialPartitionData[modelNumber].protModels = i;		  
		  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs = TRUE;
		  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		  
		  found = TRUE;
		}
	      
	      if(!found)
		{
		  if(i != GTR && i != GTR_UNLINKED)
		    {
		      strcpy(thisModel, protModels[i]);
		      strcat(thisModel, "F");
		      
		      if(strcasecmp(model, thisModel) == 0)
			{	      
			  tr->initialPartitionData[modelNumber].protModels = i;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			  found = TRUE;

			  if(tr->initialPartitionData[modelNumber].protModels == AUTO)
			    {
			      printf("\nError: Option AUTOF has been deprecated, exiting\n\n");
			      errorExit(-1);
			    }
			}

		     
		    }
		}
	      
	      
	      if(!found)
		{		  
		  strcpy(thisModel, protModels[i]);
		  strcat(thisModel, "X");
		      
		  if(strcasecmp(model, thisModel) == 0)
		    {	      
		      tr->initialPartitionData[modelNumber].protModels = i;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
		      found = TRUE;
		      
		      if(tr->initialPartitionData[modelNumber].protModels == AUTO)
			{
			  printf("\nError: Option AUTOX has been deprecated, exiting\n\n");
			  errorExit(-1);
			}
		    }			  

		 
		}
	      

	      if(found && (tr->initialPartitionData[modelNumber].protModels == GTR || tr->initialPartitionData[modelNumber].protModels == GTR_UNLINKED))
		tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;		    		
	    }

	   /* AA with Asc bias*/
	  
	  if(!found)
	    {
	      for(i = 0; i < NUM_PROT_MODELS && !found; i++)
		{	
		  strcpy(thisModel, "ASC_");
		  strcat(thisModel, protModels[i]);
		  
		  if(strcasecmp(model, thisModel) == 0)
		    {	      	      
		      tr->initialPartitionData[modelNumber].protModels = i;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs = TRUE;
		      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		  
		      found = TRUE;
		    }
		  
		  if(!found)
		    {
		      if(i != GTR && i != GTR_UNLINKED)
			{
			  strcpy(thisModel, protModels[i]);
			  strcat(thisModel, "F");
			  
			  if(strcasecmp(model, thisModel) == 0)
			    {	      
			      tr->initialPartitionData[modelNumber].protModels = i;		  
			      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			      tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			      found = TRUE;
			    }
			}
		    }
		  
	      
		  if(!found)
		    {		  
		      strcpy(thisModel, protModels[i]);
		      strcat(thisModel, "X");
		      
		      if(strcasecmp(model, thisModel) == 0)
			{	      
			  tr->initialPartitionData[modelNumber].protModels = i;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = AA_DATA;		     
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			  found = TRUE;
			}		
		    }
		  
		  if(found)
		    tr->initialPartitionData[modelNumber].ascBias = TRUE;

		  if(found && (tr->initialPartitionData[modelNumber].protModels == GTR || tr->initialPartitionData[modelNumber].protModels == GTR_UNLINKED))
		    tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;		    		
		}
	    }
	  

	  
	  if(!found)
	    {		  	  
	      if(strcasecmp(model, "DNA") == 0 || strcasecmp(model, "DNAX") == 0 || strcasecmp(model, "ASC_DNA") == 0 || strcasecmp(model, "ASC_DNAX") == 0)
		{	     	      
		  tr->initialPartitionData[modelNumber].protModels = -1;		  
		  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		  tr->initialPartitionData[modelNumber].dataType   = DNA_DATA;

		  if(strcasecmp(model, "DNAX") == 0 || strcasecmp(model, "ASC_DNAX") == 0)
		    {
		      if(strcasecmp(model, "ASC_DNAX") == 0)
			tr->initialPartitionData[modelNumber].ascBias = TRUE;
		      else
			tr->initialPartitionData[modelNumber].ascBias = FALSE;
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
		    }
		  else
		    {
		      if(strcasecmp(model, "ASC_DNA") == 0)
			tr->initialPartitionData[modelNumber].ascBias = TRUE;
		      else
			tr->initialPartitionData[modelNumber].ascBias = FALSE;
		      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
		    }
		      
		  
		  found = TRUE;
		}	  
	      else
		{	    	  
		  if(strcasecmp(model, "BIN") == 0 || strcasecmp(model, "BINX") == 0 || strcasecmp(model, "ASC_BIN") == 0 || strcasecmp(model, "ASC_BINX") == 0)
		    {	     	      
		      tr->initialPartitionData[modelNumber].protModels = -1;		  
		      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
		      tr->initialPartitionData[modelNumber].dataType   = BINARY_DATA;
		      
		      if(strcasecmp(model, "BINX") == 0 || strcasecmp(model, "ASC_BINX") == 0)
			{
			  if(strcasecmp(model, "ASC_BINX") == 0)
			    tr->initialPartitionData[modelNumber].ascBias = TRUE;
			  else
			    tr->initialPartitionData[modelNumber].ascBias = FALSE;
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			}
		      else
			{
			  if(strcasecmp(model, "ASC_BIN") == 0)
			    tr->initialPartitionData[modelNumber].ascBias = TRUE;
			  else
			    tr->initialPartitionData[modelNumber].ascBias = FALSE;
			  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
			}
		      
		      found = TRUE;
		    }
		  else
		    {
		      if(strcasecmp(model, "MULTI") == 0 || strcasecmp(model, "MULTIX") == 0 || strcasecmp(model, "ASC_MULTI") == 0 || strcasecmp(model, "ASC_MULTIX") == 0)
			{	     	      
			  tr->initialPartitionData[modelNumber].protModels = -1;		  
			  tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			  tr->initialPartitionData[modelNumber].dataType   = GENERIC_32;

			  if(strcasecmp(model, "MULTIX") == 0 || strcasecmp(model, "ASC_MULTIX") == 0)
			    {
			      if(strcasecmp(model, "ASC_MULTIX") == 0)
				tr->initialPartitionData[modelNumber].ascBias = TRUE;
			      else
				tr->initialPartitionData[modelNumber].ascBias = FALSE;
			      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
			    }
			  else
			    {
			      if(strcasecmp(model, "ASC_MULTI") == 0)
				tr->initialPartitionData[modelNumber].ascBias = TRUE;
			      else
				tr->initialPartitionData[modelNumber].ascBias = FALSE;
			      tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
			    }
			  
			  found = TRUE;
			}
		      else
			{
			  if(strcasecmp(model, "CODON") == 0 || strcasecmp(model, "CODONX") == 0 || strcasecmp(model, "ASC_CODON") == 0 || strcasecmp(model, "ASC_CODONX") == 0)
			    {	     	      
			      tr->initialPartitionData[modelNumber].protModels = -1;		  
			      tr->initialPartitionData[modelNumber].usePredefinedProtFreqs  = FALSE;
			      tr->initialPartitionData[modelNumber].dataType   = GENERIC_64;
			      
			      if(strcasecmp(model, "CODONX") == 0 || strcasecmp(model, "ASC_CODONX") == 0)
				{
				  if(strcasecmp(model, "ASC_CODONX") == 0)
				    tr->initialPartitionData[modelNumber].ascBias = TRUE;
				  else
				    tr->initialPartitionData[modelNumber].ascBias = FALSE;
				  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = TRUE;
				}
			      else
				{
				  if(strcasecmp(model, "ASC_CODON") == 0)
				    tr->initialPartitionData[modelNumber].ascBias = TRUE;
				  else
				    tr->initialPartitionData[modelNumber].ascBias = FALSE;
				  tr->initialPartitionData[modelNumber].optimizeBaseFrequencies = FALSE;
				}
			     
			      
			      found = TRUE;
			    }
			}
		    }
		  
		}
	    }

	  if(!found)
	    {
	      printf("ERROR: you specified the unknown model %s for partition %d\n", model, modelNumber);
	      exit(-1);
	    }
	}
	  
      i = 0;
      while(ident[i++] != ',');      
	  
      tr->initialPartitionData[modelNumber].partitionName = (char*)rax_malloc((n - i + 1) * sizeof(char));          
	  
      j = 0;
      while(i < n)	
	tr->initialPartitionData[modelNumber].partitionName[j++] =  ident[i++];
      
      tr->initialPartitionData[modelNumber].partitionName[j] = '\0';                      
    }
}
void parsePartitions(analdef *adef, rawdata *rdta, tree *tr)
{
  FILE *f; 
  int numberOfModels = 0; 
  int nbytes = 0;
  char *ch;
  char *cc = (char *)NULL;
  char **p_names;
  int n, i, l;
  int lower, upper, modulo;
  char buf[256];
  int **partitions;
  int pairsCount;
  int as, j;
  int k; 

  f = myfopen(modelFileName, "rb");   

 
  while(myGetline(&cc, &nbytes, f) > -1)
    {     
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  numberOfModels++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }     
  
  rewind(f);
      
  p_names = (char **)rax_malloc(sizeof(char *) * numberOfModels);
  partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels);
      
 
  
  tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * numberOfModels);

      
  for(i = 0; i < numberOfModels; i++) 
    {     
      tr->initialPartitionData[i].protModels = adef->proteinMatrix;
      tr->initialPartitionData[i].usePredefinedProtFreqs  = adef->protEmpiricalFreqs;
      tr->initialPartitionData[i].optimizeBaseFrequencies = FALSE;
      tr->initialPartitionData[i].dataType   = -1;
    }

  for(i = 0; i < numberOfModels; i++)    
    partitions[i] = (int *)NULL;
    
  i = 0;
  while(myGetline(&cc, &nbytes, f) > -1)
    {          
      if(!lineContainsOnlyWhiteChars(cc))
	{
	  n = strlen(cc);	 
	  p_names[i] = (char *)rax_malloc(sizeof(char) * (n + 1));
	  strcpy(&(p_names[i][0]), cc);	 
	  i++;
	}
      if(cc)
	rax_free(cc);
      cc = (char *)NULL;
    }         

  

  for(i = 0; i < numberOfModels; i++)
    {         
     
      ch = p_names[i];     
      pairsCount = 0;
      skipWhites(&ch);
      
      if(*ch == '=')
	{
	  printf("Identifier missing prior to '=' in %s\n", p_names[i]);
	  exit(-1);
	}
      
      analyzeIdentifier(&ch, i, tr);
      ch++;
            
    numberPairs:
      pairsCount++;
      partitions[i] = (int *)rax_realloc((void *)partitions[i], (1 + 3 * pairsCount) * sizeof(int), FALSE);
      partitions[i][0] = pairsCount;
      partitions[i][3 + 3 * (pairsCount - 1)] = -1; 	
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}   
      
      l = 0;
      while(isNum(*ch))		 
	{
	  /*printf("%c", *ch);*/
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      lower = atoi(buf);
      partitions[i][1 + 3 * (pairsCount - 1)] = lower;   
      
      skipWhites(&ch);
      
      /* NEW */
      
      if((*ch != '-') && (*ch != ','))
	{
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {
	      upper = lower;
	      goto SINGLE_NUMBER;
	    }
	  else
	    {
	      printf("'-' or ',' expected in %s\n", p_names[i]);
	      exit(-1);
	    }
	}	 
      
      if(*ch == ',')
	{	     
	  upper = lower;
	  goto SINGLE_NUMBER;
	}
      
      /* END NEW */
      
      ch++;   
      
      skipWhites(&ch);
      
      if(!isNum(*ch))
	{
	  printf("%c Number expected in %s\n", *ch, p_names[i]);
	  exit(-1);
	}    
      
      l = 0;
      while(isNum(*ch))
	{    
	  buf[l] = *ch;
	  ch++;	
	  l++;
	}
      buf[l] = '\0';
      upper = atoi(buf);     
    SINGLE_NUMBER:
      partitions[i][2 + 3 * (pairsCount - 1)] = upper;        	  
      
      if(upper < lower)
	{
	  printf("Upper bound %d smaller than lower bound %d for this partition: %s\n", upper, lower,  p_names[i]);
	  exit(-1);
	}
      
      skipWhites(&ch);
      
      if(*ch == '\0' || *ch == '\n' || *ch == '\r') /* PC-LINEBREAK*/
	{    
	  goto parsed;
	}
      
      if(*ch == ',')
	{	 
	  ch++;
	  goto numberPairs;
	}
      
      if(*ch == '\\')
	{
	  ch++;
	  skipWhites(&ch);
	  
	  if(!isNum(*ch))
	    {
	      printf("%c Number expected in %s\n", *ch, p_names[i]);
	      exit(-1);
	    }     
	  
	   if(adef->compressPatterns == FALSE)
	    {
	      printf("\nError: You are not allowed to use interleaved partitions, that is, assign non-contiguous sites\n");
	      printf("to the same partition model, when pattern compression is disabled via the -H flag,\n");
	      printf("or when pattern compression is disabled implicitely by some other option that requires it!\n\n");
	      exit(-1);
	    }

	  l = 0;
	  while(isNum(*ch))
	    {
	      buf[l] = *ch;
	      ch++;	
	      l++;
	    }
	  buf[l] = '\0';
	  modulo = atoi(buf);      
	  partitions[i][3 + 3 * (pairsCount - 1)] = modulo; 	
	  
	  skipWhites(&ch);
	  if(*ch == '\0' || *ch == '\n' || *ch == '\r')
	    {	     
	      goto parsed;
	    }
	  if(*ch == ',')
	    {	       
	      ch++;
	      goto numberPairs;
	    }
	}  
      
      if(*ch == '/')
	{
	  printf("\nRAxML detected the character \"/\" in your partition file.\n");
	  printf("Did you mean to write something similar to this: \"DNA, p1=1-100\\3\" ?\n");
	  printf("It's actually a backslash, not a slash, the program will exit now with an error!\n\n");
	}    
      else
	{ 	 
	  printf("\nRAxML detected the character \"%c\" in your partition file,\n", *ch);
	  printf("while it does not belong there!\n");
	  printf("\nAre you sure that your partition file complies with the RAxML partition file format?\n");
	  printf("\nActually reading the manual, does indeed do help a lot\n\n");
	  printf("The program will exit now with an error!\n\n");
	}

      printf("The problematic line in your partition file is this one here:\n\n");
	
      printf("%s\n\n", p_names[i]);

      assert(0);
       
    parsed:
      ;
    }
  
  fclose(f);
 
  /*********************************************************************************************************************/ 

  for(i = 0; i <= rdta->sites; i++)
    tr->model[i] = -1;
  
  for(i = 0; i < numberOfModels; i++)
    {   
      as = partitions[i][0];     
      
      for(j = 0; j < as; j++)
	{
	  lower = partitions[i][1 + j * 3];
	  upper = partitions[i][2 + j * 3]; 
	  modulo = partitions[i][3 + j * 3];	
	 
	  if(modulo == -1)
	    {
	      for(k = lower; k <= upper; k++)
		setModel(i, k, tr->model);
	    }
	  else
	    {
	      for(k = lower; k <= upper; k += modulo)
		{
		  if(k <= rdta->sites)
		    setModel(i, k, tr->model);	      
		}
	    }
	}        
    }


  for(i = 1; i < rdta->sites + 1; i++)
    {
      
      if(tr->model[i] == -1)
	{
	  printf("ERROR: Alignment Position %d has not been assigned any model\n", i);
	  exit(-1);
	}      
    }  

  for(i = 0; i < numberOfModels; i++)
    {
      rax_free(partitions[i]);
      rax_free(p_names[i]);
    }
  
  rax_free(partitions);
  rax_free(p_names);    
    
  tr->NumberOfModels = numberOfModels;     
  
  if(adef->perGeneBranchLengths)
    {
      if(tr->NumberOfModels > NUM_BRANCHES)
	{
	  printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels);
	  printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES);
	  printf("\n");
	  printf("In order to change this please replace the line \"#define NUM_BRANCHES   %d\" in file \"axml.h\" \n", NUM_BRANCHES);
	  printf("by \"#define NUM_BRANCHES   %d\" and then re-compile RAxML.\n", tr->NumberOfModels);
	  exit(-1);
	}
      else
	{
	  tr->multiBranch = 1;
	  tr->numBranches = tr->NumberOfModels;
	}
    }
}
void parseSecondaryStructure(tree *tr, analdef *adef, int sites)
{
  if(adef->useSecondaryStructure)
    {
      FILE *f = myfopen(secondaryStructureFileName, "rb");

      int
	i,
	k,
	countCharacters = 0,
	ch,
	*characters,
	**brackets,
	opening,
	closing,
	depth,
	numberOfSymbols,
	numSecondaryColumns;      

      unsigned char bracketTypes[4][2] = {{'(', ')'}, {'<', '>'},  {'[', ']'},  {'{', '}'}};          

      numberOfSymbols = 4;     

      tr->secondaryStructureInput = (char*)rax_malloc(sizeof(char) * sites);

      while((ch = fgetc(f)) != EOF)
	{
	  if(ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '.')
	    countCharacters++;
	  else
	    {
	      if(!whitechar(ch))
		{
		  printf("Secondary Structure file %s contains character %c at position %d\n", secondaryStructureFileName, ch, countCharacters + 1);
		  printf("Allowed Characters are \"( ) < > [ ] { } \" and \".\" \n");
		  errorExit(-1);
		}
	    }
	}
      
      if(countCharacters != sites)
	{
	  printf("Error: Alignment length is: %d, secondary structure file has length %d\n", sites, countCharacters);
	  errorExit(-1);
	}
    
      characters = (int*)rax_malloc(sizeof(int) * countCharacters); 

      brackets = (int **)rax_malloc(sizeof(int*) * numberOfSymbols);
      
      for(k = 0; k < numberOfSymbols; k++)	  
	brackets[k]   = (int*)rax_calloc(countCharacters, sizeof(int));

      rewind(f);

      countCharacters = 0;
      while((ch = fgetc(f)) != EOF)
	{
	  if(!whitechar(ch)) 
	    {
	      tr->secondaryStructureInput[countCharacters] = ch;
	      characters[countCharacters++] = ch;
	    }
	}
      
      assert(countCharacters == sites);

      for(k = 0; k < numberOfSymbols; k++)
	{
	  for(i = 0, opening = 0, closing = 0, depth = 0; i < countCharacters; i++)
	    {
	      if((characters[i] == bracketTypes[k][0] || characters[i] == bracketTypes[k][1]) && 
		 (tr->extendedDataVector[i+1] == AA_DATA || tr->extendedDataVector[i+1] == BINARY_DATA ||
		  tr->extendedDataVector[i+1] == GENERIC_32 || tr->extendedDataVector[i+1] == GENERIC_64))
		{
		  printf("Secondary Structure only for DNA character positions \n");
		  printf("I am at position %d of the secondary structure file and this is not part of a DNA partition\n", i+1);
		  errorExit(-1);
		}
	      
	      if(characters[i] == bracketTypes[k][0])
		{	      
		  depth++;
		  /*printf("%d %d\n", depth, i);*/
		  brackets[k][i] = depth;
		  opening++;
		}
	      if(characters[i] == bracketTypes[k][1])
		{	  
		  brackets[k][i] = depth; 
		  /*printf("%d %d\n", depth, i);  */
		  depth--;	
		  
		  closing++;
		}	  	  	  
	      
	      if(closing > opening)
		{
		  printf("at position %d there is a closing bracket too much\n", i+1);
		  errorExit(-1);
		}
	    }	

	  if(depth != 0)
	    {
	      printf("Problem: Depth: %d\n", depth);
	      printf("Your secondary structure file may be missing a closing or opening paraenthesis!\n");
	    }
	  assert(depth == 0);
	  
	  if(countCharacters != sites)
	    {
	      printf("Problem: sec chars: %d sites: %d\n",countCharacters, sites);
	      printf("The number of sites in the alignment does not match the length of the secondary structure file\n");
	    }
	  assert(countCharacters == sites);
	
      
	  if(closing != opening)
	    {
	      printf("Number of opening brackets %d should be equal to number of closing brackets %d\n", opening, closing);
	      errorExit(-1);
	    }
	}
      
      for(i = 0, numSecondaryColumns = 0; i < countCharacters; i++)
	{
	  int checkSum = 0;

	  for(k = 0; k < numberOfSymbols; k++)
	    {
	      if(brackets[k][i] > 0)
		{
		  checkSum++;
		  
		  switch(tr->secondaryStructureModel)
		    {
		    case SEC_16:
		    case SEC_16_A:
		    case SEC_16_B:
		    case SEC_16_C:
		    case SEC_16_D:
		    case SEC_16_E:
		    case SEC_16_F:
		    case SEC_16_I:
		    case SEC_16_J:
		    case SEC_16_K:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA;
		      break;
		    case SEC_6_A:
		    case SEC_6_B:
		    case SEC_6_C:
		    case SEC_6_D:
		    case SEC_6_E:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA_6;
		      break;
		    case SEC_7_A:
		    case SEC_7_B:
		    case SEC_7_C:
		    case SEC_7_D:
		    case SEC_7_E:
		    case SEC_7_F:
		      tr->extendedDataVector[i+1] = SECONDARY_DATA_7;
		      break;
		    default:
		      assert(0);
		    }
		 
		  numSecondaryColumns++;
		}
	    }
	  assert(checkSum <= 1);
	}
      
      assert(numSecondaryColumns % 2 == 0);
      
      /*printf("Number of secondary columns: %d merged columns: %d\n", numSecondaryColumns, numSecondaryColumns / 2);*/

      tr->numberOfSecondaryColumns = numSecondaryColumns;
      if(numSecondaryColumns > 0)
	{
	  int model = tr->NumberOfModels;
	  int countPairs;
	  pInfo *partBuffer = (pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels);

	  for(i = 1; i <= sites; i++)
	    {
	      for(k = 0; k < numberOfSymbols; k++)
		{
		  if(brackets[k][i-1] > 0)
		    tr->model[i] = model;
		}

	    }

	  /* now make a copy of partition data */

	 
	  for(i = 0; i < tr->NumberOfModels; i++)
	    {
	      partBuffer[i].partitionName = (char*)rax_malloc((strlen(tr->extendedPartitionData[i].partitionName) + 1) * sizeof(char));
	      strcpy(partBuffer[i].partitionName, tr->extendedPartitionData[i].partitionName);
	      strcpy(partBuffer[i].proteinSubstitutionFileName, tr->extendedPartitionData[i].proteinSubstitutionFileName);
	      strcpy(partBuffer[i].ascFileName, tr->extendedPartitionData[i].ascFileName);
	      partBuffer[i].dataType =  tr->extendedPartitionData[i].dataType;
	      partBuffer[i].protModels =  tr->extendedPartitionData[i].protModels;
	      partBuffer[i].usePredefinedProtFreqs =  tr->extendedPartitionData[i].usePredefinedProtFreqs;	      
	      partBuffer[i].optimizeBaseFrequencies =  tr->extendedPartitionData[i].optimizeBaseFrequencies;
	    }

	  for(i = 0; i < tr->NumberOfModels; i++)
	    rax_free(tr->extendedPartitionData[i].partitionName);
	  rax_free(tr->extendedPartitionData);
	 
	  tr->extendedPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * (tr->NumberOfModels + 1));
	  
	  for(i = 0; i < tr->NumberOfModels; i++)
	    {
	      tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(partBuffer[i].partitionName) + 1) * sizeof(char));
	      strcpy(tr->extendedPartitionData[i].partitionName, partBuffer[i].partitionName);
	      strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, partBuffer[i].proteinSubstitutionFileName);
	      strcpy(tr->extendedPartitionData[i].ascFileName, partBuffer[i].ascFileName);
	      tr->extendedPartitionData[i].dataType =  partBuffer[i].dataType;
	      tr->extendedPartitionData[i].protModels= partBuffer[i].protModels;
	      tr->extendedPartitionData[i].usePredefinedProtFreqs=  partBuffer[i].usePredefinedProtFreqs;
	      tr->extendedPartitionData[i].optimizeBaseFrequencies =  partBuffer[i].optimizeBaseFrequencies;
	      rax_free(partBuffer[i].partitionName);
	    }
	  rax_free(partBuffer);

	  tr->extendedPartitionData[i].partitionName = (char*)rax_malloc(64 * sizeof(char));

	  switch(tr->secondaryStructureModel)
	    {
	    case SEC_16:
	    case SEC_16_A:
	    case SEC_16_B:
	    case SEC_16_C:
	    case SEC_16_D:
	    case SEC_16_E:
	    case SEC_16_F:
	    case SEC_16_I:
	    case SEC_16_J:
	    case SEC_16_K:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 16 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA;
	      break;
	    case SEC_6_A:
	    case SEC_6_B:
	    case SEC_6_C:
	    case SEC_6_D:
	    case SEC_6_E:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 6 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA_6;
	      break;
	    case SEC_7_A:
	    case SEC_7_B:
	    case SEC_7_C:
	    case SEC_7_D:
	    case SEC_7_E:
	    case SEC_7_F:
	      strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 7 STATE MODEL");
	      tr->extendedPartitionData[i].dataType = SECONDARY_DATA_7;
	      break;
	    default:
	      assert(0);
	    }

	  tr->extendedPartitionData[i].protModels= -1;
	  tr->extendedPartitionData[i].usePredefinedProtFreqs =  FALSE;	 

	  tr->NumberOfModels++;	 
	  
	  if(adef->perGeneBranchLengths)
	    {
	      if(tr->NumberOfModels > NUM_BRANCHES)
		{
		  printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels);
		  printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES);
		  printf("Note that the number of partitions has automatically been incremented by one to accommodate secondary structure models\n");
		  printf("\n");
		  printf("In order to change this please replace the line \"#define NUM_BRANCHES   %d\" in file \"axml.h\" \n", NUM_BRANCHES);
		  printf("by \"#define NUM_BRANCHES   %d\" and then re-compile RAxML.\n", tr->NumberOfModels);
		  exit(-1);
		}
	      else
		{
		  tr->multiBranch = 1;
		  tr->numBranches = tr->NumberOfModels;
		}
	    }
	  
	  assert(countCharacters == sites);

	  tr->secondaryStructurePairs = (int*)rax_malloc(sizeof(int) * countCharacters);
	  for(i = 0; i < countCharacters; i++)
	    tr->secondaryStructurePairs[i] = -1;
	  /*
	    for(i = 0; i < countCharacters; i++)
	    printf("%d", brackets[i]);
	    printf("\n");
	  */
	  countPairs = 0;

	  for(k = 0; k < numberOfSymbols; k++)
	    {
	      i = 0;
	      
	  
	      while(i < countCharacters)
		{
		  int 
		    j = i,
		    bracket = -1,
		    openBracket,
		    closeBracket;
		  
		  while(j < countCharacters && ((bracket = brackets[k][j]) == 0))
		    {
		      i++;
		      j++;
		    }

		  assert(bracket >= 0);

		  if(j == countCharacters)
		    {
		      assert(bracket == 0);
		      break;
		    }
	    
		  openBracket = j;
		  j++;
		  
		  while(bracket != brackets[k][j] && j < countCharacters)
		    j++;
		  assert(j < countCharacters);
		  closeBracket = j;
		  
		  assert(closeBracket < countCharacters && openBracket < countCharacters);

		  assert(brackets[k][closeBracket] > 0 && brackets[k][openBracket] > 0);
		  
		  /*printf("%d %d %d\n", openBracket, closeBracket, bracket);*/
		  brackets[k][closeBracket] = 0;
		  brackets[k][openBracket]  = 0;	      	      
		  countPairs++;
		  
		  tr->secondaryStructurePairs[closeBracket] = openBracket;
		  tr->secondaryStructurePairs[openBracket] = closeBracket;
		}

	      assert(i == countCharacters);
	    }
	     
	  assert(countPairs == numSecondaryColumns / 2);

	  
	  /*for(i = 0; i < countCharacters; i++)
	    printf("%d ", tr->secondaryStructurePairs[i]);
	    printf("\n");*/
	  

	  adef->useMultipleModel = TRUE;

	}

      
      for(k = 0; k < numberOfSymbols; k++)	  
	rax_free(brackets[k]);
      rax_free(brackets);
      rax_free(characters);

      fclose(f);
    }
}
Exemple #25
0
void doAllInOne(tree *tr, analdef *adef)
{
  int i, n, bestIndex, bootstrapsPerformed;

#ifdef _WAYNE_MPI
  int 
    bootStopTests = 1,
    j,
    bootStrapsPerProcess = 0;
#endif 

  double loopTime; 
  int      *originalRateCategories;
  int      *originalInvariant;
#ifdef _WAYNE_MPI
  int      slowSearches, fastEvery;
#else
  int      slowSearches, fastEvery = 5;
#endif
  int treeVectorLength = -1;
  topolRELL_LIST *rl;  
  double bestLH, mlTime, overallTime;  
  long radiusSeed = adef->rapidBoot;
  FILE *f;
  char bestTreeFileName[1024];  
  hashtable *h = (hashtable*)NULL;
  unsigned int **bitVectors = (unsigned int**)NULL;
  boolean bootStopIt = FALSE;
  double pearsonAverage = 0.0;
  pInfo *catParams         = allocParams(tr);
  pInfo *gammaParams = allocParams(tr);
  unsigned int vLength;

  n = adef->multipleRuns; 

#ifdef _WAYNE_MPI
  if(n % processes != 0)
    n = processes * ((n / processes) + 1);
#endif

  if(adef->bootStopping)
    {    
      h = initHashTable(tr->mxtips * 100);

      treeVectorLength = adef->multipleRuns;
      
      bitVectors = initBitVector(tr, &vLength);          
    }

  rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST));
  initTL(rl, tr, n);
     
  originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int));      
  originalInvariant      = (int*)rax_malloc(tr->cdta->endsite * sizeof(int));

             

  initModel(tr, tr->rdta, tr->cdta, adef);

  if(adef->grouping)
    printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the constraint tree specified in %s\n", tree_file);
  if(adef->constraint)
    printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the bifurcating backbone constraint tree specified in %s\n", tree_file);
 

#ifdef _WAYNE_MPI
  long parsimonySeed0 = adef->parsimonySeed;
  long replicateSeed0 = adef->rapidBoot;
  n = n / processes;
#endif
 
  for(i = 0; i < n && !bootStopIt; i++)
    {  
#ifdef _WAYNE_MPI
      j = i + n * processID;
      tr->treeID = j;
#else              
      tr->treeID = i;
#endif

      tr->checkPointCounter = 0;
        
      loopTime = gettime();  

#ifdef _WAYNE_MPI
      if(i == 0)
        {
          if(parsimonySeed0 != 0)
            adef->parsimonySeed = parsimonySeed0 + 10000 * processID;
          adef->rapidBoot = replicateSeed0 + 10000 * processID;
          radiusSeed = adef->rapidBoot;
        }
#endif          
     
      if(i % 10 == 0)
	{
	  if(i > 0)	    	    
	    reductionCleanup(tr, originalRateCategories, originalInvariant);	    	  

	  if(adef->grouping || adef->constraint)
	    {
	      FILE *f = myfopen(tree_file, "rb");	

	      assert(adef->restart);	      
	      if (! treeReadLenMULT(f, tr, adef))
		exit(-1);
	     
	      fclose(f);
	    }
	  else
	    makeParsimonyTree(tr, adef);
	  
	  tr->likelihood = unlikely;
	  if(i == 0)
	    {
	      double t;
	          
	      onlyInitrav(tr, tr->start);
	      treeEvaluate(tr, 1);	     	
	     	      
	      t = gettime();    	      

	      modOpt(tr, adef, FALSE, 5.0);	    
#ifdef _WAYNE_MPI
	      printBothOpen("\nTime for BS model parameter optimization on Process %d: %f seconds\n", processID, gettime() - t);	     
#else
	      printBothOpen("\nTime for BS model parameter optimization %f\n", gettime() - t);
#endif
	      
	      memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite);
	      memcpy(originalInvariant,      tr->invariant,          sizeof(int) * tr->cdta->endsite);

	      if(adef->bootstrapBranchLengths)
		{
		  if(tr->rateHetModel == CAT)
		    {
		      copyParams(tr->NumberOfModels, catParams, tr->partitionData, tr);		      
		      assert(tr->cdta->endsite == tr->originalCrunchedLength);		 
		      catToGamma(tr, adef);		      
		      modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
		      copyParams(tr->NumberOfModels, gammaParams, tr->partitionData, tr);		      
		      gammaToCat(tr);
		      copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr);		      
		    }
		  else
		    {		  
		      assert(tr->cdta->endsite == tr->originalCrunchedLength);		 		     		     		      		     
		    }
		}
	    }	  	  
	}

      computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); 
      resetBranches(tr);

     

      evaluateGenericInitrav(tr, tr->start);
    
      treeEvaluate(tr, 1);    	             
     
      computeBOOTRAPID(tr, adef, &radiusSeed);  
#ifdef _WAYNE_MPI
      saveTL(rl, tr, j);
#else                      	  
      saveTL(rl, tr, i);
#endif

      if(adef->bootstrapBranchLengths)
	{
	  double 
	    lh = tr->likelihood;
	  	 
	  if(tr->rateHetModel == CAT)
	    {
	      copyParams(tr->NumberOfModels, tr->partitionData, gammaParams, tr);	      
	     
	      catToGamma(tr, adef);
	      
	      
	      resetBranches(tr);
	      onlyInitrav(tr, tr->start);
	      treeEvaluate(tr, 2.0);
	  
	     
	      gammaToCat(tr);
	     
	
	      copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr);	      
	      tr->likelihood = lh;
	    }
	  else
	    {	     
	      treeEvaluate(tr, 2.0);
	      tr->likelihood = lh;
	    }
	}
      
      printBootstrapResult(tr, adef, TRUE); 

      loopTime = gettime() - loopTime; 
      writeInfoFile(adef, tr, loopTime); 
     
      if(adef->bootStopping)
#ifdef _WAYNE_MPI
	{
	  int 
	    nn = (i + 1) * processes;

	  if((nn > START_BSTOP_TEST) && 
	     (i * processes < FC_SPACING * bootStopTests) &&
	     ((i + 1) * processes >= FC_SPACING * bootStopTests)
	     )	     
	    {
	      MPI_Barrier(MPI_COMM_WORLD);
	                    
	      concatenateBSFiles(processes, bootstrapFileName);                
	      
              MPI_Barrier(MPI_COMM_WORLD);	      
	      
	      bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage);
	      bootStopTests++;
	    }
	}	
#else	
      bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef);
#endif


    }  
 
#ifdef _WAYNE_MPI      
  MPI_Barrier(MPI_COMM_WORLD);
  
  bootstrapsPerformed = i * processes; 
  bootStrapsPerProcess = i;   
      
  concatenateBSFiles(processes, bootstrapFileName);
  removeBSFiles(processes, bootstrapFileName);  
  
  MPI_Barrier(MPI_COMM_WORLD); 
#else
  bootstrapsPerformed = i;
#endif

  rax_freeParams(tr->NumberOfModels, catParams);
  rax_free(catParams);

  rax_freeParams(tr->NumberOfModels, gammaParams);
  rax_free(gammaParams);

  if(adef->bootStopping)
    {
      freeBitVectors(bitVectors, 2 * tr->mxtips);
      rax_free(bitVectors);
      freeHashTable(h);
      rax_free(h);      
    }

 
  {      
    double t;

    printBothOpenMPI("\n\n");
    
    if(adef->bootStopping)
      {
	if(bootStopIt)
	  {
	    switch(tr->bootStopCriterion)
	      {
	      case FREQUENCY_STOP:
		printBothOpenMPI("Stopped Rapid BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage);	      
		break;
	      case MR_STOP:
		printBothOpenMPI("Stopped Rapid BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);	     
		break;
	      case MRE_STOP:
		printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);	     
		break;
	      case MRE_IGN_STOP:
		printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);	     
		break;
	      default:
		assert(0);
	      }
	  }
	else
	  { 
	    switch(tr->bootStopCriterion)	     
	      {
	      case FREQUENCY_STOP:
		printBothOpenMPI("Rapid BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage);
		break;
	      case MR_STOP:
		printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);
		break;
	      case MRE_STOP:
		printBothOpenMPI("Rapid BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);
		break;
	      case MRE_IGN_STOP:
		printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed);
		printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage);
		break;
	      default:
		assert(0);
	      }
	  }
      }
    

    t = gettime() - masterTime;

    printBothOpenMPI("Overall Time for %d Rapid Bootstraps %f seconds\n", bootstrapsPerformed, t);     
    printBothOpenMPI("Average Time per Rapid Bootstrap %f seconds\n", (double)(t/((double)bootstrapsPerformed)));  
        
    if(!adef->allInOne)     
      {
	printBothOpenMPI("All %d bootstrapped trees written to: %s\n", bootstrapsPerformed, bootstrapFileName);

#ifdef _WAYNE_MPI      	 
	MPI_Finalize();
#endif
	exit(0);
      }
  }
 
  
  /* ML-search */ 

  mlTime = gettime();
  double t = mlTime;
  
  printBothOpenMPI("\nStarting ML Search ...\n\n"); 

  /***CLEAN UP reduction stuff */  

  reductionCleanup(tr, originalRateCategories, originalInvariant);  

  /****/     	   
  
#ifdef _WAYNE_MPI 
  restoreTL(rl, tr, n * processID); 
#else
  restoreTL(rl, tr, 0);
#endif

  resetBranches(tr);

  

  evaluateGenericInitrav(tr, tr->start);   

  

  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);  

#ifdef _WAYNE_MPI
  
  if(bootstrapsPerformed <= 100)
    fastEvery = 5;
  else
    fastEvery = bootstrapsPerformed / 20;

  for(i = 0; i < bootstrapsPerformed; i++)
    rl->t[i]->likelihood = unlikely;

  for(i = 0; i < bootStrapsPerProcess; i++)
    {            
      j = i + n * processID;
    
      if(i % fastEvery == 0)
	{	 
	  restoreTL(rl, tr, j); 	 	    	   	
	  
	  resetBranches(tr);	 

	  evaluateGenericInitrav(tr, tr->start);
	  	  
	  treeEvaluate(tr, 1); 		 
	  	  
	  optimizeRAPID(tr, adef);	  			         	  
	  
	  saveTL(rl, tr, j);  
	}    
    }     
#else
  for(i = 0; i < bootstrapsPerformed; i++)
    {            
      rl->t[i]->likelihood = unlikely;
    
      if(i % fastEvery == 0)
	{
	 
	  
	  restoreTL(rl, tr, i); 	 	    	   	
	  
	  resetBranches(tr);	 

	  evaluateGenericInitrav(tr, tr->start);
	  	  
	  treeEvaluate(tr, 1); 		 
	  	  
	  optimizeRAPID(tr, adef);	  			         	  
	  
	 

	  saveTL(rl, tr, i); 	 
	}    
    }     
#endif
 
  printBothOpenMPI("Fast ML optimization finished\n\n"); 
  t = gettime() - t;
  
#ifdef _WAYNE_MPI
  printBothOpen("Fast ML search on Process %d: Time %f seconds\n\n", processID, t);
  j = n * processID;

  qsort(&(rl->t[j]), n, sizeof(topolRELL*), compareTopolRell);

  restoreTL(rl, tr, j);
#else
  printBothOpen("Fast ML search Time: %f seconds\n\n", t);
  qsort(&(rl->t[0]), bootstrapsPerformed, sizeof(topolRELL*), compareTopolRell);
       
  restoreTL(rl, tr, 0);
#endif
  t = gettime();
  
  resetBranches(tr);

  evaluateGenericInitrav(tr, tr->start);

  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);     
  
  slowSearches = bootstrapsPerformed / 5;
  if(bootstrapsPerformed % 5 != 0)
    slowSearches++;

  slowSearches  = MIN(slowSearches, 10); 

#ifdef _WAYNE_MPI
   if(processes > 1)
    {
      if(slowSearches % processes == 0)
        slowSearches = slowSearches / processes;
      else
        slowSearches = (slowSearches / processes) + 1;
    }
   
   for(i = 0; i < slowSearches; i++)
    {           
      j = i + n * processID;
      restoreTL(rl, tr, j);     
      rl->t[j]->likelihood = unlikely;  
      
      evaluateGenericInitrav(tr, tr->start);

      treeEvaluate(tr, 1.0);   
      
      thoroughOptimization(tr, adef, rl, j); 
   }   
#else
  for(i = 0; i < slowSearches; i++)
    {           
      restoreTL(rl, tr, i);     
      rl->t[i]->likelihood = unlikely;  
      
      evaluateGenericInitrav(tr, tr->start);

      treeEvaluate(tr, 1.0);   
      
      thoroughOptimization(tr, adef, rl, i); 	 

   }
#endif
  
  

  /*************************************************************************************************************/  
  
  if(tr->rateHetModel == CAT) 
    {      
      catToGamma(tr, adef);    
      modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); 
    }

  bestIndex = -1;
  bestLH = unlikely;
    
#ifdef _WAYNE_MPI
  for(i = 0; i < slowSearches; i++)
    { 
      j = i + n * processID;
      restoreTL(rl, tr, j);
      resetBranches(tr);

      evaluateGenericInitrav(tr, tr->start);

      treeEvaluate(tr, 2);
      
      printBothOpen("Slow ML Search %d Likelihood: %f\n", j, tr->likelihood);
      
      if(tr->likelihood > bestLH)
	{
	  bestLH = tr->likelihood;
	  bestIndex = j;
	}
    }
  /*printf("processID = %d, bestIndex = %d; bestLH = %f\n", processID, bestIndex, bestLH);*/
#else
  for(i = 0; i < slowSearches; i++)
    { 
      restoreTL(rl, tr, i);
      resetBranches(tr);

      evaluateGenericInitrav(tr, tr->start);

      treeEvaluate(tr, 2);
      
      printBothOpen("Slow ML Search %d Likelihood: %f\n", i, tr->likelihood);
      
      if(tr->likelihood > bestLH)
	{
	  bestLH = tr->likelihood;
	  bestIndex = i;
	}
    }
#endif
  
  printBothOpenMPI("Slow ML optimization finished\n\n");

  t = gettime() - t;

#ifdef _WAYNE_MPI
  printBothOpen("Slow ML search on Process %d: Time %f seconds\n", processID, t);
#else
  printBothOpen("Slow ML search Time: %f seconds\n", t);
#endif
  
  t = gettime();
  
  restoreTL(rl, tr, bestIndex);
  resetBranches(tr);

  evaluateGenericInitrav(tr, tr->start);
 
  treeEvaluate(tr, 2); 
         
  Thorough = 1;
  tr->doCutoff = FALSE;  
	 
  treeOptimizeThorough(tr, 1, 10);
  evaluateGenericInitrav(tr, tr->start);
  
  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
  t = gettime() - t;

#ifdef _WAYNE_MPI
  printBothOpen("Thorough ML search on Process %d: Time %f seconds\n", processID, t);
#else
  printBothOpen("Thorough ML search Time: %f seconds\n", t);
#endif

#ifdef _WAYNE_MPI
  bestLH = tr->likelihood;

  printf("\nprocessID = %d, bestLH = %f\n", processID,  bestLH);

  if(processes > 1)
    {
      double *buffer;
      int bestProcess;

      buffer = (double *)rax_malloc(sizeof(double) * processes);
      for(i = 0; i < processes; i++)
        buffer[i] = unlikely;
      buffer[processID] = bestLH;
      for(i = 0; i < processes; i++)
        MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD);
      bestLH = buffer[0];
      bestProcess = 0;
      for(i = 1; i < processes; i++)
        if(buffer[i] > bestLH)
          {
             bestLH = buffer[i];
             bestProcess = i;
          }
      rax_free(buffer);

      if(processID != bestProcess)
        {
          MPI_Finalize();
          exit(0);
        }
    }
#endif

  printBothOpen("\nFinal ML Optimization Likelihood: %f\n", tr->likelihood);   
  printBothOpen("\nModel Information:\n\n");
  
  printModelParams(tr, adef);    
  
  strcpy(bestTreeFileName, workdir); 
  strcat(bestTreeFileName, "RAxML_bestTree.");
  strcat(bestTreeFileName,         run_id);
   
  Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
  f = myfopen(bestTreeFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);

  if(adef->perGeneBranchLengths)
    printTreePerGene(tr, adef, bestTreeFileName, "w");

  
  overallTime = gettime() - masterTime;
  mlTime    = gettime() - mlTime;

  printBothOpen("\nML search took %f secs or %f hours\n", mlTime, mlTime / 3600.0); 
  printBothOpen("\nCombined Bootstrap and ML search took %f secs or %f hours\n", overallTime, overallTime / 3600.0);   
  printBothOpen("\nDrawing Bootstrap Support Values on best-scoring ML tree ...\n\n");
      
  
  freeTL(rl);   
  rax_free(rl);       
  
  calcBipartitions(tr, adef, bestTreeFileName, bootstrapFileName);    
  

  overallTime = gettime() - masterTime;

  printBothOpen("Program execution info written to %s\n", infoFileName);
  printBothOpen("All %d bootstrapped trees written to: %s\n\n", bootstrapsPerformed, bootstrapFileName);
  printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName);
  if(adef->perGeneBranchLengths && tr->NumberOfModels > 1)    
    printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to  %s.PARTITION.%d\n\n", bestTreeFileName,  bestTreeFileName, 
		  tr->NumberOfModels - 1);    
  printBothOpen("Best-scoring ML tree with support values written to: %s\n\n", bipartitionsFileName);
  printBothOpen("Best-scoring ML tree with support values as branch labels written to: %s\n\n", bipartitionsFileNameBranchLabels);
  printBothOpen("Overall execution time for full ML analysis: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0);

#ifdef _WAYNE_MPI
  MPI_Finalize();
#endif      

  exit(0); 
}
Exemple #26
0
void doInference(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  int i, n;

#ifdef _WAYNE_MPI
  int 
    j,
    bestProcess;
#endif

  double loopTime;
  topolRELL_LIST *rl = (topolRELL_LIST *)NULL; 
  int 
    best = -1,
    newBest = -1;
  double 
    bestLH = unlikely; 
  FILE *f;
  char bestTreeFileName[1024]; 
  double overallTime;

  n = adef->multipleRuns;
     
#ifdef _WAYNE_MPI
  if(n % processes != 0)
    n = processes * ((n / processes) + 1);
#endif 

  if(!tr->catOnly)
    {
      rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST));
      initTL(rl, tr, n);
    }

#ifdef _WAYNE_MPI
  long parsimonySeed0 = adef->parsimonySeed;
  n = n / processes;
#endif

  if(adef->rellBootstrap)
    { 
#ifdef _WAYNE_MPI          
      tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, parsimonySeed0 + 10000 * processID); 
#else     
      tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, adef->parsimonySeed);        
#endif

       tr->rellTrees = (treeList *)rax_malloc(sizeof(treeList));
       initTreeList(tr->rellTrees, tr, NUM_RELL_BOOTSTRAPS);
    }
  else
    {
      tr->resample = (int *)NULL;
      tr->rellTrees =  (treeList *)NULL;
    }

  for(i = 0; i < n; i++)
    { 
#ifdef _WAYNE_MPI 
      if(i == 0)
        { 
          if(parsimonySeed0 != 0) 
            adef->parsimonySeed = parsimonySeed0 + 10000 * processID;
        }
      j = i + n * processID;
      tr->treeID = j;
#else    
      tr->treeID = i;
#endif

      tr->checkPointCounter = 0;
         
      loopTime = gettime();
                                             
      initModel(tr, rdta, cdta, adef); 

      if(i == 0)
	printBaseFrequencies(tr);
     
      getStartingTree(tr, adef); 
                       
      computeBIGRAPID(tr, adef, TRUE);  

#ifdef _WAYNE_MPI
      if(tr->likelihood > bestLH)
	{
	  best = j;
	  bestLH = tr->likelihood;
	}

      if(!tr->catOnly)
	saveTL(rl, tr, j);
#else
      if(tr->likelihood > bestLH)
	{
	  best = i;
	  bestLH = tr->likelihood;
	}

      if(!tr->catOnly)
	saveTL(rl, tr, i);
#endif

      loopTime = gettime() - loopTime; 
      writeInfoFile(adef, tr, loopTime);
     
    }     
 
  assert(best >= 0);

#ifdef _WAYNE_MPI
  MPI_Barrier(MPI_COMM_WORLD);
  n = n * processes;
#endif

  if(tr->catOnly)
    {
      printBothOpenMPI("\n\nNOT conducting any final model optimizations on all %d trees under CAT-based model ....\n", n);
      printBothOpenMPI("\nREMEMBER that CAT-based likelihood scores are meaningless!\n\n", n);        
#ifdef _WAYNE_MPI
      if(processID != 0)
        {
          MPI_Finalize();
          exit(0);
        }
#endif
    }
  else
    {
      printBothOpenMPI("\n\nConducting final model optimizations on all %d trees under GAMMA-based models ....\n\n", n);
 
#ifdef _WAYNE_MPI
      n = n / processes;
#endif

      if(tr->rateHetModel == GAMMA ||  tr->rateHetModel == GAMMA_I)
	{
	  restoreTL(rl, tr, best);
	  evaluateGenericInitrav(tr, tr->start);
	  if(!adef->useBinaryModelFile)
	    modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); 
	  else
	    {
	      readBinaryModel(tr, adef);
	      evaluateGenericInitrav(tr, tr->start);
	      treeEvaluate(tr, 2);
	    }
	  bestLH = tr->likelihood;
	  tr->likelihoods[best] = tr->likelihood;
	  saveTL(rl, tr, best);
	  tr->treeID = best; 
	  printResult(tr, adef, TRUE);
	  newBest = best;      
	  
	  for(i = 0; i < n; i++)
	    {
#ifdef _WAYNE_MPI
	      j = i + n * processID;
	      if(j != best)
		{
		  restoreTL(rl, tr, j);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 1);
		  tr->likelihoods[j] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    {
		      newBest = j;
		      bestLH = tr->likelihood;		  
		      saveTL(rl, tr, j);
		    }
		  tr->treeID = j;
		  printResult(tr, adef, TRUE);
		}
	      if(n == 1 && processes == 1)
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);	   
	      else	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j);
#else	  
	      if(i != best)
		{
		  restoreTL(rl, tr, i);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 1);
		  tr->likelihoods[i] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    {
		      newBest = i;
		      bestLH = tr->likelihood;		  
		      saveTL(rl, tr, i);
		    }
		  tr->treeID = i;
		  printResult(tr, adef, TRUE);
		}

	      
	      if(n == 1)
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);	   
	      else	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i);
#endif	    	 
	    }    
	}
      else
	{     
	  catToGamma(tr, adef);
	  
#ifdef _WAYNE_MPI
	  for(i = 0; i < n; i++)
            {
              j = i + n*processID;
	      rl->t[j]->likelihood = unlikely;
            }  
#else
	  for(i = 0; i < n; i++)
	    rl->t[i]->likelihood = unlikely;
#endif
	  
	  initModel(tr, rdta, cdta, adef);
	  
	  restoreTL(rl, tr, best);      
	  
	  resetBranches(tr);
	  evaluateGenericInitrav(tr, tr->start);
	  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);      
	  tr->likelihoods[best] = tr->likelihood;
	  bestLH = tr->likelihood;     
	  saveTL(rl, tr, best);
	  tr->treeID = best;
	  printResult(tr, adef, TRUE);
	  newBest = best;
	  
	  for(i = 0; i < n; i++)
	    {
#ifdef _WAYNE_MPI
	      j = i + n*processID;
	      if(j != best)
		{
		  restoreTL(rl, tr, j);	    
		  resetBranches(tr);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 2);
		  tr->likelihoods[j] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    { 
		      newBest = j;
		      bestLH = tr->likelihood;		
		      saveTL(rl, tr, j);	  
		    }
		  tr->treeID = j;
		  printResult(tr, adef, TRUE);
		} 
	      
	      if(n == 1 && processes == 1)	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);
	      else
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j);
#else
	      if(i != best)
		{
		  restoreTL(rl, tr, i);	    
		  resetBranches(tr);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 2);
		  tr->likelihoods[i] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    { 
		      newBest = i;
		      bestLH = tr->likelihood;		
		      saveTL(rl, tr, i);	  
		    }
		  tr->treeID = i;
		  printResult(tr, adef, TRUE);
		} 
	      
	      if(n == 1)	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);
	      else
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i);	   	  
#endif
	    }
	}     
    
      assert(newBest >= 0);

#ifdef _WAYNE_MPI
      if(processes > 1)
	{
	  double 
	    *buffer = (double *)rax_malloc(sizeof(double) * processes);
	  for(i = 0; i < processes; i++)
	    buffer[i] = unlikely;
	  buffer[processID] = bestLH;
	  for(i = 0; i < processes; i++)
	    MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD);
	  bestLH = buffer[0];
	  bestProcess = 0;
	  for(i = 1; i < processes; i++)
	    if(buffer[i] > bestLH)
	      {
		bestLH = buffer[i];
		bestProcess = i;
	      }
	  
	  rax_free(buffer);	  	  
	}

      if(processID == bestProcess)
	{
#endif

	  restoreTL(rl, tr, newBest);
	  evaluateGenericInitrav(tr, tr->start);
     
	  printBothOpen("\n\nStarting final GAMMA-based thorough Optimization on tree %d likelihood %f .... \n\n", newBest, tr->likelihoods[newBest]);
	  
	  Thorough = 1;
	  tr->doCutoff = FALSE; 
	  treeOptimizeThorough(tr, 1, 10); 
	  evaluateGenericInitrav(tr, tr->start);
	  
	  printBothOpen("Final GAMMA-based Score of best tree %f\n\n", tr->likelihood); 
	  
	  
	  strcpy(bestTreeFileName, workdir); 
	  strcat(bestTreeFileName, "RAxML_bestTree.");
	  strcat(bestTreeFileName,         run_id);
	  
	  
	  Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
	  
	  f = myfopen(bestTreeFileName, "wb");
	  fprintf(f, "%s", tr->tree_string);
	  fclose(f);
	  
	  if(adef->perGeneBranchLengths)
	    printTreePerGene(tr, adef, bestTreeFileName, "w");      
#ifdef _WAYNE_MPI
	}
#endif
    }

  if(adef->rellBootstrap)
    {
      //WARNING the functions below need to be invoked after all other trees have been printed
      //don't move this part of the code further up!

      int
	i;
#ifdef _WAYNE_MPI
      FILE 
	*f = myfopen(rellBootstrapFileNamePID, "wb");      
#else
      FILE 
	*f = myfopen(rellBootstrapFileName, "wb");
#endif
      
      for(i = 0; i < NUM_RELL_BOOTSTRAPS; i++)
	{
	   restoreTreeList(tr->rellTrees, tr, i);
	   Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
	   fprintf(f, "%s", tr->tree_string);
	}

      freeTreeList(tr->rellTrees);
      rax_free(tr->rellTrees);
      rax_free(tr->resample);

      fclose(f);

#ifdef _WAYNE_MPI      
      MPI_Barrier(MPI_COMM_WORLD);      
      
      concatenateBSFiles(processes, rellBootstrapFileName);
      removeBSFiles(processes, rellBootstrapFileName);  
      
      MPI_Barrier(MPI_COMM_WORLD); 
      
      if(processID == 0)
	printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName);
#else
      printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName);    
#endif
    }
  
#ifdef _WAYNE_MPI 
  if(processID == bestProcess)
    {
#endif
      overallTime = gettime() - masterTime;
      
      printBothOpen("Program execution info written to %s\n", infoFileName);
      
      if(!tr->catOnly)
	{
	  printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName);
	  
	  if(adef->perGeneBranchLengths && tr->NumberOfModels > 1)    
	    printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to  %s.PARTITION.%d\n\n", bestTreeFileName,  bestTreeFileName, 
			  tr->NumberOfModels - 1);  
	}
      
      printBothOpen("Overall execution time: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0);    
#ifdef _WAYNE_MPI 
    }
#endif

  if(!tr->catOnly)
    {
      freeTL(rl);   
      rax_free(rl); 
    }
  
#ifdef _WAYNE_MPI
  MPI_Finalize();
#endif
  exit(0);
}
static double evaluatePartialGTRCAT(int i, double ki, int counter,  traversalInfo *ti, double qz,
				    int w, double *EIGN, double *EI, double *EV,
				    double *tipVector, unsigned  char **yVector, 
				    int branchReference, int mxtips, double* expVector)
{
  double lz, term;       
  double  d[3];
  double   *x1, *x2; 
  int scale = 0, k;
  double *lVector = (double *)rax_malloc(sizeof(double) * 4 * mxtips);    

  traversalInfo *trav = &ti[0];
 
  assert(isTip(trav->pNumber, mxtips));
     
  x1 = &(tipVector[4 *  yVector[trav->pNumber][i]]);   

  if (!expVector)
    {
      for(k = 1; k < counter; k++)
        computeVectorGTRCAT(lVector, &scale, ki, i, ti[k].qz[branchReference], ti[k].rz[branchReference], &ti[k],
    			EIGN, EI, EV,
    			tipVector, yVector, mxtips);
    }
  else
    {
      for(k = 1; k < counter; k++)
	{
	  double
	    *ev1 = &expVector[k*6],
	    *ev2 = &expVector[k*6+3];

	  computeVectorGTRCAT_FAST(lVector, &scale, ki, i, &ti[k], EI, EV,
			tipVector, yVector, mxtips, ev1, ev2);
	}
    }

  x2 = &lVector[4 * (trav->qNumber - mxtips)]; 

  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
       
  if(qz < zmin) 
    lz = zmin;
  lz  = log(qz); 
  lz *= ki;  
  
  d[0] = EXP (EIGN[0] * lz);
  d[1] = EXP (EIGN[1] * lz);
  d[2] = EXP (EIGN[2] * lz);       	   
  
  term =  x1[0] * x2[0];
  term += x1[1] * x2[1] * d[0];
  term += x1[2] * x2[2] * d[1];
  term += x1[3] * x2[3] * d[2];     

  term = LOG(FABS(term)) + (scale * LOG(minlikelihood));   

  term = term * w;

  rax_free(lVector);  

  return  term;
}
Exemple #28
0
void computeBOOTRAPID (tree *tr, analdef *adef, int64_t *radiusSeed) 
{ 
  int i, bestTrav, impr;                
  double lh, previousLh, difference, epsilon;              
  bestlist *bestT, *bt;  
  int countIT;
  
  bestT = (bestlist *) rax_malloc(sizeof(bestlist));
  bestT->ninit = 0;
  initBestTree(bestT, 1, tr->mxtips);
  saveBestTree(bestT, tr);

  bt = (bestlist *) rax_malloc(sizeof(bestlist));      
  bt->ninit = 0;  
  initBestTree(bt, 5, tr->mxtips);

  initInfoList(10);
 
  difference = 10.0;
  epsilon = 0.01;         

  bestTrav = adef->bestTrav = 5 + 11 * randum(radiusSeed);
   
  Thorough = 1;  
 
  impr = 1;

  if(tr->doCutoff)
    tr->itCount = 0;

  tr->bigCutoff = TRUE;
  
  for(countIT = 0; countIT < 2 && impr; countIT++) 
    {              
      recallBestTree(bestT, 1, tr);       
      treeEvaluate(tr, 1);	 	                    
      saveBestTree(bestT, tr); 
           
      lh = previousLh = tr->likelihood;
         
      treeOptimizeRapid(tr, 1, bestTrav, adef, bt);       

      impr = 0;
	  
      for(i = 1; i <= bt->nvalid; i++)
	{	    		  	   
	  recallBestTree(bt, i, tr);	    	  	  
	 

	  treeEvaluate(tr, 0.25);	  	  
	      
	  difference = ((tr->likelihood > previousLh)? 
			tr->likelihood - previousLh: 
			previousLh - tr->likelihood); 	    
	  if(tr->likelihood > lh && difference > epsilon)
	    {
	      impr = 1;	       
	      lh = tr->likelihood;	       	     
	      saveBestTree(bestT, tr);
	    }	   	   
	}

    } 
  
  tr->bigCutoff = FALSE;

  recallBestTree(bestT, 1, tr);   
  freeBestTree(bestT);
  rax_free(bestT);
  freeBestTree(bt);
  rax_free(bt);
  freeInfoList(); 
}
Exemple #29
0
char *Tree2String(char *treestr, tree *tr, nodeptr p, boolean printBranchLengths, boolean printNames, boolean printLikelihood, 
		  boolean rellTree, 
		  boolean finalPrint, analdef *adef, int perGene, boolean branchLabelSupport, boolean printSHSupport, boolean printIC, boolean printSHSupports)
{ 

  if(rellTree)
    assert(!branchLabelSupport && !printSHSupport);

  if(branchLabelSupport)
    assert(!rellTree && !printSHSupport);

  if(printSHSupport)
    assert(!branchLabelSupport && !rellTree);

  if(finalPrint && adef->outgroup)
    {
      nodeptr startNode = tr->start;

      if(tr->numberOfOutgroups > 1)
	{
	  nodeptr root;
	  nodeptr *subtrees = (nodeptr *)rax_malloc(sizeof(nodeptr) * tr->mxtips);
	  int i, k, count = 0;
	  int *nodeNumbers = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
	  int *foundVector = (int*)rax_malloc(sizeof(int) * tr->numberOfOutgroups);
	  boolean monophyletic = FALSE;

	  collectSubtrees(tr, subtrees, &count, tr->numberOfOutgroups);

	  /*printf("Found %d subtrees of size  %d\n", count, tr->numberOfOutgroups);*/
	  
	  for(i = 0; (i < count) && (!monophyletic); i++)
	    {
	      int l, sum, nc = 0;
	      for(k = 0; k <  tr->numberOfOutgroups; k++)
		{
		  nodeNumbers[k] = -1;
		  foundVector[k] = 0;
		}

	      checkOM(subtrees[i], nodeNumbers, &nc, tr);	      
	      
	      for(l = 0; l < tr->numberOfOutgroups; l++)
		for(k = 0; k < tr->numberOfOutgroups; k++)
		  {
		    if(nodeNumbers[l] == tr->outgroupNums[k])
		      foundVector[l] = 1;
		  }
	      
	      sum = 0;
	      for(l = 0; l < tr->numberOfOutgroups; l++)
		sum += foundVector[l];
	      
	      if(sum == tr->numberOfOutgroups)
		{	       		  
		  root = subtrees[i];
		  tr->start = root;		
		  /*printf("outgroups are monphyletic!\n");*/
		  monophyletic = TRUE;		  
		}
	      else
		{
		  if(sum > 0)
		    {
		      /*printf("outgroups are NOT monophyletic!\n");*/
		      monophyletic = FALSE;
		    }	     
		}	
	    }
	  
	  if(!monophyletic)
	    {
	      printf("WARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
	      printf("from the list to root the tree!\n");
	     

	      {
		FILE *infoFile = myfopen(infoFileName, "ab");

		fprintf(infoFile, "\nWARNING, outgroups are not monophyletic, using first outgroup \"%s\"\n", tr->nameList[tr->outgroupNums[0]]);
		fprintf(infoFile, "from the list to root the tree!\n");
		
		fclose(infoFile);
	      }


	      tr->start = tr->nodep[tr->outgroupNums[0]];
	      
	      rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			 finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
	    }
	  else
	    {	     
	      if(isTip(tr->start->number, tr->rdta->numsp))
		{
		  printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
		  errorExit(-1);
		}
	      if(isTip(tr->start->back->number, tr->rdta->numsp))
	      	{
		  printf("Outgroup-Monophyly ERROR; tr->start is a tip \n");
		  errorExit(-1);
		}
	      	      
	      rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
			 finalPrint, adef, perGene, branchLabelSupport, printSHSupport);
	    }
	  
	  rax_free(foundVector);
	  rax_free(nodeNumbers);
	  rax_free(subtrees);
	}
      else
	{	  
	  tr->start = tr->nodep[tr->outgroupNums[0]];	 
	  rootedTree(treestr, tr, tr->start->back, printBranchLengths, printNames, printLikelihood, rellTree, 
		     finalPrint, adef, perGene, branchLabelSupport, printSHSupports);
	}      

      tr->start = startNode;
    }
  else    
    Tree2StringREC(treestr, tr, p, printBranchLengths, printNames, printLikelihood, rellTree, 
		   finalPrint, perGene, branchLabelSupport, printSHSupport, printIC, printSHSupports);  
    
  
  while (*treestr) treestr++;
  
  return treestr;
}
//Use the plausibility checker overhead
void plausibilityChecker(tree *tr, analdef *adef)
{
  FILE
  *treeFile, 
    *treeFile2,
    *rfFile;
  
  tree 
    *smallTree = (tree *)rax_malloc(sizeof(tree));

  char 
    rfFileName[1024];

  int
    numberOfTreesAnalyzed = 0,
    i;

  double 
    avgRF = 0.0,
    sumEffectivetime = 0.0;

  /* set up an output file name */

  strcpy(rfFileName,         workdir);  
  strcat(rfFileName,         "RAxML_RF-Distances.");
  strcat(rfFileName,         run_id);

  rfFile = myfopen(rfFileName, "wb");  

  assert(adef->mode ==  PLAUSIBILITY_CHECKER);

  /* open the big reference tree file and parse it */

  treeFile = myfopen(tree_file, "r");

  printBothOpen("Parsing reference tree %s\n", tree_file);

  treeReadLen(treeFile, tr, FALSE, TRUE, TRUE, adef, TRUE, FALSE);


  assert(tr->mxtips == tr->ntips);
  
  /*************************************************************************************/
  /* Preprocessing Step */

  double 
    preprocesstime = gettime();
  
  /* taxonToLabel[2*tr->mxtips - 2]; 
  Array storing all 2n-2 labels from the preordertraversal: (Taxonnumber - 1) -> (Preorderlabel) */
  int 
    *taxonToLabel  = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int)),

    /* taxonHasDeg[2*tr->mxtips - 2] 
    Array used to store the degree of every taxon, is needed to extract Bipartitions from multifurcating trees 
    (Taxonnumber - 1) -> (degree of node(Taxonnumber)) */

    *taxonHasDeg = (int *)rax_calloc((2*tr->mxtips - 2),sizeof(int)),

    /* taxonToReduction[2*tr->mxtips - 2]; 
  Array used for reducing bitvector and speeding up extraction: 

  (Taxonnumber - 1) -> Index in smallTreeTaxa (starting from 0)
  which is also:
  (Taxonnumber - 1) -> (0..1 (increment count of taxa appearing in small tree))
  (Taxonnumber - 1) -> (0..1 (increment count of inner nodes appearing in small tree)) */

    *taxonToReduction = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int));
    
  int 
    newcount = 0; //counter used for correct traversals

  /* labelToTaxon[2*tr->mxtips - 2];
  is used to translate between Perorderlabel and p->number: (Preorderlabel) -> (Taxonnumber) */
  int 
    *labelToTaxon = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int));
  
  /* Preorder-Traversal of the large tree */
  preOrderTraversal(tr->start->back,tr->mxtips, tr->start->number, taxonToLabel, labelToTaxon, &newcount);

  newcount = 0; //counter set to 0 to be now used for Eulertraversal

  /* eulerIndexToLabel[4*tr->mxtips - 5]; 
  Array storing all 4n-5 PreOrderlabels created during eulertour: (Eulerindex) -> (Preorderlabel) */
  int* 
    eulerIndexToLabel = (int *)rax_malloc((4*tr->mxtips - 5) * sizeof(int));

  /* taxonToEulerIndex[tr->mxtips]; 
  Stores all indices of the first appearance of a taxa in the eulerTour: (Taxonnumber - 1) -> (Index of the Eulertour where Taxonnumber first appears) 
  is used for efficient computation of the Lowest Common Ancestor during Reconstruction Step
  */
  int*
    taxonToEulerIndex  = (int *)rax_malloc((tr->mxtips) * sizeof(int));

  /* Init taxonToEulerIndex and taxonToReduction */
  int 
    ix;

  for(ix = 0; ix < tr->mxtips; ++ix)    
    taxonToEulerIndex[ix] = -1;    
  
  for(ix = 0; ix < (2*tr->mxtips - 2); ++ix)    
    taxonToReduction[ix] = -1;    


  /* Eulertraversal of the large tree*/
  unrootedEulerTour(tr->start->back,tr->mxtips, eulerIndexToLabel, taxonToLabel, &newcount, taxonToEulerIndex);

  /* Creating RMQ Datastructure for efficient retrieval of LCAs, using Johannes Fischers Library rewritten in C
  Following Files: rmq.h,rmqs.c,rmqs.h are included in Makefile.RMQ.gcc */
  RMQ_succinct(eulerIndexToLabel,4*tr->mxtips - 5);

  double 
    preprocessendtime = gettime() - preprocesstime;

  /* Proprocessing Step End */
  /*************************************************************************************/

  printBothOpen("The reference tree has %d tips\n", tr->ntips);

  fclose(treeFile);
  
  /***********************************************************************************/
  /* RF-OPT Preprocessing Step */
  /***********************************************************************************/

  /* now see how many small trees we have */
  treeFile = getNumberOfTrees(tr, bootStrapFile, adef);
  treeFile2 = getNumberOfTrees(tr, bootStrapFile, adef);

  checkTreeNumber(tr->numberOfTrees, bootStrapFile);

  /* allocate a data structure for parsing the potentially mult-furcating tree */

  allocateMultifurcations(tr, smallTree);

  /* Start Additional preprocessing step */

  int 
    numberOfBips = 0,
    numberOfSets = 0;

  //Stores the number of bips of each tree
  int *bipsPerTree = (int *)rax_malloc(tr->numberOfTrees * sizeof(int));

  //Stores the number of taxa for each tree
  int *taxaPerTree = (int *)rax_malloc(tr->numberOfTrees * sizeof(int));

  //To calculate all bipartitions, I created a new treeFile2 and a new getNumberOfTrees method!!
  for(i = 0; i < tr->numberOfTrees; i++) {

    int this_treeBips = readMultifurcatingTree(treeFile2, smallTree, adef, TRUE);
  
    numberOfBips = numberOfBips + this_treeBips;
  
    numberOfSets = numberOfSets + this_treeBips * this_treeBips;

    bipsPerTree[i] = this_treeBips;
  }

  printf("numberOfBips: %i , numberOfSets: %i \n \n", numberOfBips, numberOfSets);  

  //stores induced bips (OLD?)
  unsigned int *ind_bips = (unsigned int *)rax_malloc(numberOfBips * sizeof(unsigned int));

  //stores smalltree bips (OLD?)
  unsigned int *s_bips = (unsigned int *)rax_malloc(numberOfBips * sizeof(unsigned int));

  //stores small bips per tree
  unsigned int ***sBipsPerTree = (unsigned int ***)rax_malloc(tr->numberOfTrees * sizeof(unsigned int**));

  //stores induced bips per tree
  unsigned int ***indBipsPerTree = (unsigned int ***)rax_malloc(tr->numberOfTrees * sizeof(unsigned int**));

  //stores vLength of each tree for processing bitVectors
  unsigned int *vectorLengthPerTree = (unsigned int *)rax_malloc(tr->numberOfTrees * sizeof(unsigned int*));

  //stores the corresponding tree number for each bip
  int *treenumberOfBip = (int *)rax_malloc(numberOfBips * sizeof(int));

  //Stores all dropsets of all trees 
  int **sets = (int **)rax_malloc(numberOfSets * sizeof(int*)); 
  //int **sets = NULL;

  //For each tree, stores a translation array from taxanumber smalltree->largetree
  int **smallTreeTaxaList = (int **)rax_malloc(tr->numberOfTrees * sizeof(int*)); 

  //For each tree, store a translation array from taxanumber largetree->smalltree
  int **taxonToReductionList = (int **)rax_malloc(tr->numberOfTrees * sizeof(int*));

  //I use these variables as global variables for all trees to determine the max number of possible sets to generate a static array
  int currentBips = 0;
  int currentSmallBips = 0;
  int currentSets = 0;

  //int currentTree = 0; already there in number of trees analyzed
  
  //Prefill sets with -1s
  for(int it = 0;it < (numberOfSets);it++){
  int fill[1] = {-1};
  sets[it] = fill; 
  }
  
  /***********************************************************************************/
  /* RF-OPT Preprocessing Step End */
  /***********************************************************************************/

  /* loop over all small trees */

  for(i = 0; i < tr->numberOfTrees;  i++)
    {      
      int
    numberOfSplits = readMultifurcatingTree(treeFile, smallTree, adef, TRUE);
      
      if(numberOfSplits > 0)
  {
    int
      firstTaxon;           

    double
      rec_rf,
      maxRF;

    if(numberOfTreesAnalyzed % 100 == 0)
      printBothOpen("Small tree %d has %d tips and %d bipartitions\n", i, smallTree->ntips, numberOfSplits);    
    
    /* compute the maximum RF distance for computing the relative RF distance later-on */
    
    /* note that here we need to pay attention, since the RF distance is not normalized 
       by 2 * (n-3) but we need to account for the fact that the multifurcating small tree 
       will potentially contain less bipartitions. 
       Hence the normalization factor is obtained as n-3 + numberOfSplits, where n-3 is the number 
       of bipartitions of the pruned down large reference tree for which we know that it is 
       bifurcating/strictly binary */
    
    maxRF = (double)(2 * numberOfSplits);
    
    /* now get the index of the first taxon of the small tree.
       we will use this to unambiguously store the bipartitions 
    */
    
    firstTaxon = smallTree->start->number;

    //Saves the number of taxa in the tree (for RF-OPT)
    taxaPerTree[numberOfTreesAnalyzed] = smallTree->ntips; 
    
    /***********************************************************************************/
    /* Reconstruction Step */
    
    double 
      time_start = gettime();
    
    /* Init hashtable to store Bipartitions of the induced subtree T|t_i */
    /* 
       using smallTree->ntips instead of smallTree->mxtips yields faster code 
       e.g. 120 versus 128 seconds for 20,000 small trees on my laptop 
     */
    hashtable
      *s_hash = initHashTable(smallTree->ntips * 4);


    /* Init hashtable to store Bipartitions of the reference tree t_i*/
    hashtable
      *ind_hash = initHashTable(smallTree->ntips * 4);
    
    /* smallTreeTaxa[smallTree->ntips]; 
       Stores all taxa numbers from smallTree into an array called smallTreeTaxa: (Index) -> (Taxonnumber)  */
    int* 
      smallTreeTaxa = (int *)rax_malloc((smallTree->ntips) * sizeof(int));
    
    /* counter is set to 0 for correctly extracting taxa of the small tree */
    newcount = 0; 
    
    int 
      newcount2 = 0;
    
    /* seq2[2*smallTree->ntips - 2]; 
       stores PreorderSequence of the reference smalltree: (Preorderindex) -> (Taxonnumber) */
    int* 
      seq2 = (int *)rax_malloc((2*smallTree->ntips - 2) * sizeof(int));
    
    /* used to store the vectorLength of the bitvector */
    unsigned int 
      vectorLength;
    
    /* extract all taxa of the smalltree and store it into an array, 
       also store all counts of taxa and nontaxa in taxonToReduction */
    rec_extractTaxa(smallTreeTaxa, taxonToReduction, smallTree->start, smallTree->mxtips, &newcount, &newcount2);
    
    rec_extractTaxa(smallTreeTaxa, taxonToReduction, smallTree->start->back, smallTree->mxtips, &newcount, &newcount2);
    
    /* counter is set to 0 to correctly preorder traverse the small tree */
    newcount = 0;
    
    /* Preordertraversal of the small reference tree and save its sequence into seq2 for later extracting the bipartitions, it
       also stores information about the degree of every node */
    
    rec_preOrderTraversalMulti(smallTree->start->back,smallTree->mxtips, smallTree->start->number, seq2, taxonHasDeg, &newcount);
    
    /* calculate the bitvector length */
    if(smallTree->ntips % MASK_LENGTH == 0)
      vectorLength = smallTree->ntips / MASK_LENGTH;
    else
      vectorLength = 1 + (smallTree->ntips / MASK_LENGTH); 


    /***********************************************************************************/
    /* RF-OPT Additional Preprocessing storing Bipartitions */
    /***********************************************************************************/    

    vectorLengthPerTree[numberOfTreesAnalyzed] = vectorLength;
    
    unsigned int 
      **bitVectors = rec_initBitVector(smallTree, vectorLength);

    unsigned int
      **sBips;

    /* store all non trivial bitvectors using an subtree approach for the reference subtree and 
       store it into a hashtable, this method was changed for multifurcation */
    sBips = RFOPT_extractBipartitionsMulti(bitVectors, seq2, newcount,tr->mxtips, vectorLength, smallTree->ntips, 
               firstTaxon, s_hash, taxonToReduction, taxonHasDeg, numberOfSplits);

    sBipsPerTree[numberOfTreesAnalyzed] = sBips;

    /***********************************************************************************/
    /* End RF-OPT Additional Preprocessing storing Bipartitions */
    /***********************************************************************************/  
    
    /* counter is set to 0 to be used for correctly storing all EulerIndices */
    newcount = 0; 
    
    /* smallTreeTaxonToEulerIndex[smallTree->ntips]; 
       Saves all first Euler indices for all Taxons appearing in small Tree: 
       (Index) -> (Index of the Eulertour where the taxonnumber of the small tree first appears) */
    int* 
      smallTreeTaxonToEulerIndex = (int *)rax_malloc((smallTree->ntips) * sizeof(int));
    
    /* seq[(smallTree->ntips*2) - 1] 
       Stores the Preordersequence of the induced small tree */
    int* 
      seq = (int *)rax_malloc((2*smallTree->ntips - 1) * sizeof(int));
    
    
    /* iterate through all small tree taxa */
    for(ix = 0; ix < smallTree->ntips; ix++) 
      {        
        int 
          taxanumber = smallTreeTaxa[ix];
        
        /* To create smallTreeTaxonToEulerIndex we filter taxonToEulerIndex for taxa in the small tree*/
        smallTreeTaxonToEulerIndex[newcount] = taxonToEulerIndex[taxanumber-1]; 
        
        /* Saves all Preorderlabel of the smalltree taxa in seq*/
        seq[newcount] = taxonToLabel[taxanumber-1];
        
        newcount++;
      }
    
    /* sort the euler indices to correctly calculate LCA */
    //quicksort(smallTreeTaxonToEulerIndex,0,newcount - 1);             
    
    qsort(smallTreeTaxonToEulerIndex, newcount, sizeof(int), sortIntegers);
    
    //printf("newcount2 %i \n", newcount2);      
    /* Iterate through all small tree taxa */
    for(ix = 1; ix < newcount; ix++)
      {  
        /* query LCAs using RMQ Datastructure */
        seq[newcount - 1 + ix] =  eulerIndexToLabel[query(smallTreeTaxonToEulerIndex[ix - 1],smallTreeTaxonToEulerIndex[ix])];   
        
        /* Used for dynamic programming. We save an index for every inner node:
     For example the reference tree has 3 inner nodes which we saves them as 0,1,2.
     Now we calculate for example 5 LCA to construct the induced subtree, which are also inner nodes. 
     Therefore we mark them as 3,4,5,6,7  */
        
        taxonToReduction[labelToTaxon[seq[newcount - 1 + ix]] - 1] = newcount2;
        
        newcount2 += 1;
      }
    
    /* sort to construct the Preordersequence of the induced subtree */
    //quicksort(seq,0,(2*smallTree->ntips - 2));
    
    qsort(seq, (2 * smallTree->ntips - 2) + 1, sizeof(int), sortIntegers);
    
    /* calculates all bipartitions of the reference small tree and count how many bipartition it 
    shares with the induced small tree and stores those bipartitions in a additional hashtable called ind_hash */
    int 
      rec_bips = 0;

    unsigned int
      **indBips;

    indBips = RFOPT_findAddBipartitions(bitVectors, seq,(2*smallTree->ntips - 1), labelToTaxon, tr->mxtips, vectorLength, smallTree->ntips, firstTaxon, s_hash, ind_hash, taxonToReduction);
      
    indBipsPerTree[numberOfTreesAnalyzed] = indBips; 

    /* calculates all bipartitions of the reference small tree and put them into ind_hash*/
    // rec_extractBipartitionsMulti(bitVectors, seq2, (2*smallTree->ntips - 1),tr->mxtips, vectorLength, smallTree->ntips, 
    // firstTaxon, s_hash, taxonToReduction, taxonHasDeg, numberOfSplits);


    /* Reconstruction Step End */
    /***********************************************************************************/
    
    double 
      effectivetime = gettime() - time_start;
    
    /*
      if(numberOfTreesAnalyzed % 100 == 0)
      printBothOpen("Reconstruction time: %.10f secs\n\n", effectivetime);
    */
    
    /* compute the relative RF */


    /***********************************************************************************/
    /* RF-OPT Save Translation Vectors */
    /***********************************************************************************/
      
    //copy array taxonToReduction because it is originally defined in preprocessing step
    int * taxonToReductionCopy = (int *)rax_malloc((tr->mxtips)*sizeof(int));

    memcpy(taxonToReductionCopy,taxonToReduction,(tr->mxtips)*sizeof(int));

    //storing smallTree and taxonToReduction Arrays for further usage
    smallTreeTaxaList[numberOfTreesAnalyzed] = smallTreeTaxa;

    taxonToReductionList[numberOfTreesAnalyzed] = taxonToReductionCopy;   

    int this_currentSmallBips = 0; //Variable resets everytime for each tree analyzed
    
    
    /***********************************************************************************/
    /* End RF-OPT Save Translation Vectors */
    /***********************************************************************************/
  

    rec_rf = (double)(2 * (numberOfSplits - rec_bips)) / maxRF;
    
    assert(numberOfSplits >= rec_bips);      

    avgRF += rec_rf;
    sumEffectivetime += effectivetime;
    
    //if(numberOfTreesAnalyzed % 100 == 0)
    printBothOpen("Relative RF tree %d: %f\n\n", i, rec_rf);
    
    fprintf(rfFile, "%d %f\n", i, rec_rf);
    
    //rax_free(smallTreeTaxa); //Need it for calculating the SmallTreeTaxaList after all iterations!
    rax_free(seq);
    rax_free(seq2);
    rax_free(smallTreeTaxonToEulerIndex);

    numberOfTreesAnalyzed++; //Counting the number of trees analyzed
    }

  }// End of Small Tree Iterations

  /***********************************************************************************/
  /* RF-OPT DropSet Calculation using BitVectors */
  /***********************************************************************************/

  
  log_info("===> Create DropSet Datastructure \n");

  static Hashmap* map = NULL;
  //Set a hashmap for dropsets with a dropset comparision and standard hash
  map = Hashmap_create(compareDropSet, NULL);

  static Hashmap** mapArray = NULL;
  //Set an array to store the pointers to bitvector hashtables for each tree 
  mapArray = rax_malloc(tr->numberOfTrees * sizeof(Hashmap*));


  printf("===> BitVector Set Calculation \n");

  //Calculate dropsets of two given bips lists and extract all sets into array sets and into a hashmap. Each set has following format
  //dropset = {taxa_1,taxa_2,...,taxa_n,-1};
  //Furtheremore calculate Dropset generates two data structures from type bips and dropsets which are pointing to each other in hashtables
  calculateDropSets(mapArray, map, indBipsPerTree, sBipsPerTree, sets, smallTreeTaxaList, bipsPerTree, 
  taxaPerTree, vectorLengthPerTree, tr->numberOfTrees);

  /***********************************************************************************/
  /* RF-OPT Graph Construction */
  /***********************************************************************************/

  // printf("\n == Sets == \n");
  // for(int fooo = 0; fooo < numberOfSets; fooo++){
  //   printf("Set %i: ", fooo);
  //   int i = 0;
  //   while(sets[fooo][i] > -1) {
  //    printf("%i ",sets[fooo][i]);
  //    i++;
  //   }
  //   printf("\n");
  // }
  // printf("\n");
  /*
    Filter for unique sets
  */
  log_info("===> Hashmap tests...\n");
  
  Hashmap_traverse(map, traverse_cb);

  // int key[2] = {0,-1};

  // Dropset* drop = Hashmap_get(map,key);
  // DArray* bips = drop->bipartitions;

  // for(int i = 0; i < DArray_count(bips); i++) {
  //   Bipartition* bip = DArray_get(bips,i);
  //   printBitVector(bip->bitvector[0]);
  //   printf("matching: %i \n", bip->matching);
  //   printf("tree: %i \n", bip->treenumber);
  // }

  // Bipartition* bipFromHash = DArray_first(bips);
  // Bipartition* testBip = Hashmap_get(mapArray[0],bipFromHash->bitvector);
  // printf("matching before: %i",testBip->matching);
  // testBip->matching = 999;

  // for(int i = 0; i < DArray_count(bips); i++) {
  //   Bipartition* bip = DArray_get(bips,i);
  //   printBitVector(bip->bitvector[0]);
  //   printf("matching: %i \n", bip->matching);
  //   printf("tree: %i \n", bip->treenumber);
  // }


  printf("===> Filter for unique sets (naive)...\n");

  /* unique sets array data structures */
  int** uniqSets = (int **) rax_malloc(sizeof(int*) * numberOfSets);
  int* setsToUniqSets = (int*) rax_malloc(sizeof(int) * numberOfSets);
  int numberOfUniqueSets = 0;
  int dropSetCount = 0;



  //stores the scores for each bips, we are using a bitvector approach (need to scale)
    
  //Legacy Code 
  int bvec_scores = 0;
  
  numberOfUniqueSets = getUniqueDropSets(sets, uniqSets, setsToUniqSets, numberOfSets);

  printf("number of unique sets: %i \n", numberOfUniqueSets);

  /*
    Detect initial matchings, we calculate them using bitvectors to represent our bipartitions
  */
  printf("===> Detect initial matchings...\n");
  int vLengthBip = 0;

  //determine the bitVector Length of our bitVector
  if(numberOfBips % MASK_LENGTH == 0)
    vLengthBip = numberOfBips / MASK_LENGTH; 
  else 
    vLengthBip = numberOfBips / MASK_LENGTH + 1;

  //Initialize a bvecScore vector with 0s
  int* bvecScores = (int*)rax_calloc(vLengthBip,sizeof(int));

  //Calculate Initial Matchings and save the result in bvecScores
  detectInitialMatchings(sets, bvecScores, bipsPerTree, numberOfTreesAnalyzed, vLengthBip); 

  //Short summary until now:
  // - bipsPerTree consists of all bipartitions per tree
  // - bvecScores is the bitvector setting 1 to all bipartition indices which can score 
  // - taxaPerTree number of taxa per tree
  // - smallTreeTaxaList list of all smalltree->largetree translation arrays

  /*
    Generate useful data structures for calculating and updating scores
  */
  printf("===> Create data structures...\n");  
  //Stores the number of bips per Set and initialize it with 0s
  int* numberOfBipsPerSet = (int*)rax_calloc(numberOfUniqueSets,sizeof(int));

  //Stores all sets which includes this taxa
  int **setsOfTaxa = (int**)rax_malloc((tr->mxtips + 1) *sizeof(int*));
  
  //Now calculate number of bipartitions affected by each unique set
  for(int i = 0; i < numberOfSets; i++) {

    int setindex = setsToUniqSets[i];

    numberOfBipsPerSet[setindex]++;
  }

  //Now using the knowledge of how many bips there are per set, generate an array for each unique dropset containing all bips
  int** bipsOfDropSet = (int**)rax_malloc(sizeof(int*)*numberOfUniqueSets);
  
  //Allocate the space needed for storing all bips
  for(int i = 0; i < numberOfUniqueSets; i++) {

    bipsOfDropSet[i] = (int*)rax_malloc(sizeof(int)*numberOfBipsPerSet[i]); 
  }
  
  printf("==> Initialize the Bips Of Taxa \n");
  //Stores the number of bips each taxa is included (ABC|DE is stored by A,B,C,D and E)
  //It can be calculated by iterating through all trees and adding the taxa 
  int **bipsOfTaxa = (int**)rax_malloc((tr->mxtips + 1) * sizeof(int*));
  int *numberOfBipsPerTaxa = (int*)rax_calloc((tr->mxtips + 1), sizeof(int));
  int *taxaBipsCounter = (int*)rax_calloc((tr->mxtips + 1), sizeof(int));

  //Now add up all
  for (int tree = 0; tree < tr->numberOfTrees; tree++) {

    int* list = smallTreeTaxaList[tree];

    for (int j = 0; j < taxaPerTree[tree]; j++) {

      int taxa = list[j];

      numberOfBipsPerTaxa[taxa] = numberOfBipsPerTaxa[taxa] + bipsPerTree[tree];
    } 
  }

  //Now create dummy arrays inside bipsOfTaxa
  for(int i = 1; i < tr->mxtips+1; i++) {
    bipsOfTaxa[i] = (int*)rax_malloc(sizeof(int)*numberOfBipsPerTaxa[i]);
  }

  printf("==> Storing all bip indices of a certain dropset into an array \n");
  //For checking if all dropsets are iterated
  dropSetCount = 0;
  //Arrays of counter to keep in track
  int* counterOfSet = (int*)rax_malloc(sizeof(int)*numberOfUniqueSets);
  for(int i = 0; i < numberOfUniqueSets; i++) {
    counterOfSet[i] = 0;
  }

  currentBips = 0; //Need to keep in track of the number of bips
  //First iterate through all trees 
  for(int i = 0; i < numberOfTreesAnalyzed; i++ ) {

    //get the correct smallTreeTaxa List
    int* list = smallTreeTaxaList[i];

    //For each bipartition in the tree
    for(int j = 0; j < bipsPerTree[i]; j++) {

      //Look at all bips it is compared too
      int dropSetsPerBip = bipsPerTree[i];

      for(int k = 0; k < dropSetsPerBip; k++){

        int indexOfUniqDropSet = setsToUniqSets[dropSetCount + k];

        int* bips_array = bipsOfDropSet[indexOfUniqDropSet]; 

        //add bipartition j into the bips array of its dropset
        bips_array[counterOfSet[indexOfUniqDropSet]] = currentBips; 

        //increment the internal array index 
        counterOfSet[indexOfUniqDropSet]++;
      }
    //Jump to the next correct dropSetCount!
    dropSetCount = dropSetCount + dropSetsPerBip;

    //now insert the bip into bipsOfTaxa Array
    for(int ix = 0; ix < taxaPerTree[i]; ix++) {

      //get the taxa number
      int stree_Taxa = list[ix];

      //get the bips list of this taxa number
      int* bipsList = bipsOfTaxa[stree_Taxa];

      //now get the position of the biplist and put in our bip index
      bipsList[taxaBipsCounter[stree_Taxa]] = currentBips;

      //increment the counter 
      taxaBipsCounter[stree_Taxa]++;

    }

    //increment currentBips
    currentBips++; 
    }

  }

  /***********************************************************************************/
  /* End RF-OPT Graph Construction */
  /***********************************************************************************/

  /* Short summary :
    sets - array of all dropsets
    uniqSets - array of all unique dropsets
    bipsPerTree - bips per tree
    setsToUniqSets - translates the index of sets to the index of its unique dropset index
    bipsOfDropSets - all bips which disappear when dropset i is pruned
    scores - has all scores between 0 and 1 for the bips (however 0s can be found out by looking at all dropsets with link to dropset 0 (because we sort and it will always be the lowest))  
  */


  /***********************************************************************************/
  /* RF-OPT Initial Score Calculation */
  /***********************************************************************************/


  unsigned int bipsVectorLength;

  /* calculate the bitvector length for bips bitvector */
  if(numberOfBips % MASK_LENGTH == 0)
    bipsVectorLength = numberOfBips / MASK_LENGTH;
  else
    bipsVectorLength = 1 + (numberOfBips / MASK_LENGTH); 

  //Starting from index 1 (because 0 stands for all who already matches)
  //We need a score array saving the scores for each uniqset
  int* rf_score = (int*)rax_calloc(numberOfUniqueSets,sizeof(int));

  printf("==> Calculating the score for the first iteration \n \n");

  //Store all bvecs of all merged and destroyed bipartitions per DropSet 
  int* bvecs_bips = (int*)rax_malloc(sizeof(int)*numberOfUniqueSets);
  int* bvecs_destroyed = (int*)rax_malloc(sizeof(int)*numberOfUniqueSets);



  //Iterate through all sets
  for(int i = 0; i < numberOfUniqueSets; i++) {

    //Bitvectors of merged and destroyed
    int bvec_destroyed = 0;

    int* set = uniqSets[i]; //Get the dropset, first dropset is 0 (if something is matching)

    //printf(" ==> Analyze Unique DropSet %i \n", i);

    //We use this data structure to keep track of the to toggled bits
    int* toggleBits = (int*)rax_calloc(numberOfBips, sizeof(int));

    //Now iterate through the set
    int j = 0;

    //Stores the affected bips into a bitvector
    int bvec_bips = 0;

    while(set[j] != -1) {

      int taxa = set[j]; //Get the taxa
      //printf("  Taxa number is %i \n",taxa);

      //Check if set[j] is itself already a set
      int test[2] = {taxa,-1}; 

      //0 if it is not a set, index + 1 otherwise
      int test_index = contains(test, uniqSets, numberOfUniqueSets);

      if(test_index){
        //printf("  It also is in uniqSet %i \n", test_index - 1);
        bvec_bips = getBipsOfDropSet(bvec_bips, (test_index - 1), numberOfBipsPerSet, bipsOfDropSet);

      }

      //Get all bips of this taxa to detect which one will be destroyed
      int* listOfBips = bipsOfTaxa[taxa]; 

      //Go through all bipartitions containing this taxa
      for(int k = 0; k < numberOfBipsPerTaxa[taxa]; k++){

        int bipindex = listOfBips[k]; //Get the index of the Bipartition

        int bip = ind_bips[bipindex];

        //Now analyze this Bipartition

        //Which tree does this bipartition belongs too?
        int treenumber = treenumberOfBip[bipindex];

        //Get the taxonToSmallTree Array of this tree
        int* stTaxa = taxonToReductionList[treenumber];

        //Translate the global taxon number it into the local index used by our bips
        int translated_index = stTaxa[taxa - 1]; //We use taxa - 1 because we start counting at taxa 1 = 0 !

        //Save the to toggle index into toggleBits vector
        toggleBits[bipindex] |= 1 << translated_index;

        //Sort for bits set on one side of the bip and on the other side
        int leftBits = __builtin_popcount(toggleBits[bipindex] & bip);
        int rightBits = __builtin_popcount(toggleBits[bipindex]) - leftBits;

        //Check for the number of bits set in the original bip 
        int leftBip = __builtin_popcount(bip);
        int rightBip = taxaPerTree[treenumber] - leftBip;

        //Subtract the total number of bits set on one side of the bip with the bits we have to toggle
        int leftBip_after = leftBip - leftBits;
        int rightBip_after = rightBip - rightBits;

        //Check if bipartition gets trivial/destroyed due to pruning the taxa and set the bit (representing the bip) which is destroyed
        if((leftBip_after <= 1) | (rightBip_after <=1)) {

        //Add bips to the bits which represent destroyed bipartitions
        bvec_destroyed = setBit(bvec_destroyed,bipindex);

        }
      
      } 

      j++;

    }//End iterate through the set


    int penality = 0;
    int score = 0;

    int bvec_mask = 0;
    bvec_mask = setOffSet(bvec_mask, numberOfBips);

    //Bitvector of already matching bips
    int bvec_tmp = 0;
    bvec_tmp = ~bvec_scores & bvec_mask;

    //Penality score are all bitvectors who were matching but is destroyed 
    penality = __builtin_popcount(bvec_destroyed & bvec_tmp);

    //Now iterate through bipsOfDropSet list and extract all bips which will merge into a bitVector
    bvec_bips = getBipsOfDropSet(bvec_bips, i, numberOfBipsPerSet, bipsOfDropSet);

    //Calculate the bitvectors which remains
    bvec_tmp = ~bvec_destroyed & bvec_mask;

    bvec_tmp = bvec_bips & bvec_tmp;

    score = __builtin_popcount(bvec_scores & bvec_tmp);

    rf_score[i] = score - penality;

    //Save our results for convenience into an array
    bvecs_bips[i] = bvec_bips;
    bvecs_destroyed[i] = bvec_destroyed;

  }//End Score Calculation


  printf("======> Scores:\n");
  for(int i = 0; i < numberOfUniqueSets; i++) {
    printf("RF Score for %i : %i \n", i, rf_score[i]);
    //printBitVector(bvecs_bips[i]);
    //printBitVector(bvecs_destroyed[i]);
  }

  int maxDropSet = getMax(rf_score, numberOfUniqueSets);
  printf("Max Element is %i \n", maxDropSet);




  /***********************************************************************************/
  /* RF-OPT Create Update Data Structures */
  /***********************************************************************************/


  printf("====> Delete DropSet from all bips and update numbers \n");

  //Create a bitVector to store all deleted taxa
  int bvec_deletedTaxa = 0;

  //Create a bitVector to store all still existing bips
  int bvec_existingBips = 0;

  //Create a bitvector to store deleted dropsets
  int bvec_deletedDropSets = 0;

  //Get the dropset
  int* deleteDropSet = uniqSets[maxDropSet];

  //Store it into a BitVector
  bvec_deletedDropSets = setBit(bvec_deletedDropSets,maxDropSet);

  //Select all bips destroyed by removing this dropset
  int bvec_destroyedBips = bvecs_destroyed[maxDropSet];

  //Select all bips that are now matching
  int bvec_matchingBips = bvecs_bips[maxDropSet];

  //Filter for existent bipartitions
  bvec_existingBips = getExistingBips(bvec_existingBips, numberOfBips, bvec_destroyedBips);

  //Iterate through its taxa
  int iterSet = 0;
  while(deleteDropSet[iterSet] != -1) {

    //Get taxon
    int taxon = deleteDropSet[iterSet];

    //Store the taxon into deletedTaxa BitVector
    bvec_deletedTaxa = setBit(bvec_deletedTaxa, taxon - 1);

    //Check if taxon is inside
    int test[2] = {taxon, -1};

    int index = contains(test, uniqSets, numberOfUniqueSets);

    iterSet++;
  }

  //printBitVector(bvec_existingBips);
  //printBitVector(bvec_deletedTaxa);

  //Update the scores with now matching bips
  bvec_scores = bvec_scores & (~bvec_matchingBips);

  //printBitVector(bvec_scores);

  /* Short summary :
    bvec_existingBips - bitVector of all still existing bips
    bvec_deletedTaxa - bitVector to keep track of destroyed taxa
  */

  /***********************************************************************************/
  /* TODO RF-OPT Update function */
  /***********************************************************************************/

  
  /***********************************************************************************/
  /* End RF-OPT Update function */
  /***********************************************************************************/


  //printf("Ind Bipartitions?: ");


  // printf("Induced Bipartitions: ");

  // printBitVector(ind_bips[0]);
  // printBitVector(ind_bips[1]);
  // printBitVector(ind_bips[2]);
  // printBitVector(ind_bips[3]);
  // printBitVector(ind_bips[4]);
  // printBitVector(ind_bips[5]);
  // printBitVector(ind_bips[6]);


  /***********************************************************************************/
  /* Console Logs for debugging */
  /***********************************************************************************/

  //Printing if

  printf("==> Unique Sets: ");
  for(int i = 0; i < numberOfUniqueSets; i++) {
    int j = 0;
    int* set = uniqSets[i];
    while(set[j] > -1) {
      printf("%i ",set[j]);
      j++;
    }
    printf("; ");
  }
  printf("\n");

  printf("\n == Sets == \n");
  for(int fooo = 0; fooo < numberOfSets; fooo++){
    printf("Set %i: ", fooo);
    int i = 0;
    while(sets[fooo][i] > -1) {
     printf("%i ",sets[fooo][i]);
     i++;
    }
    printf("\n");
  }
  printf("\n");

      
    //#define _PRINT_
      
    #ifdef _PRINT_

    for(int i = 0; i < numberOfUniqueSets; i++) {
      printf("Bips of Set %i: ", i);
        for(int j = 0; j < numberOfBipsPerSet[i]; j++) {
          int* bips = bipsOfDropSet[i];
          printf("%i ", bips[j]);
        }
      printf("\n");
    }


    printf("Induced Bips! \n");
    // Now checking which dropset would destroy which bipartition 
    for(int i = 0 ; i < numberOfBips; i++) {
      printf("Bip %i is %i \n",i,ind_bips[i]);
    }


    printf("Taxa Names : \n");
    for(int i = 0; i < tr->mxtips + 1; i++) {
      printf("%s ",tr->nameList[i]);
    }
    printf("\n");

    printf("Small Tree Taxa Names 0 : \n");
    for(int i = 0; i < taxaPerTree[0]; i++) {
      int* list = smallTreeTaxaList[0];
      int taxa = list[i]; 
      printf("%s ",tr->nameList[taxa]);
    }
    printf("\n");

    printf("Small Tree Taxa Names 1 : \n");
    for(int i = 0; i < taxaPerTree[1]; i++) {
      int* list = smallTreeTaxaList[1];
      int taxa = list[i]; 
      printf("%s ",tr->nameList[taxa]);
    }
    printf("\n");

    printf("Small Tree Taxa Names 2 : \n");
    for(int i = 0; i < taxaPerTree[2]; i++) {
      int* list = smallTreeTaxaList[2];
      int taxa = list[i]; 
      printf("%s ",tr->nameList[taxa]);
    }
    printf("\n");

    printf("Number of DropSets extracted%i \n",dropSetCount);
    printf("Number of Bips extracted %i \n",currentBips);

    //Testing ...
    printf("Number of Sets is %i \n",numberOfSets);
    printf("Number of Unique Sets is %i \n",numberOfUniqueSets);

    printf("==> Testing bips of unique sets \n");
    for(int i = 0; i < numberOfUniqueSets; i++) {
      printf("Bips of Set %i: ", i);
        for(int j = 0; j < numberOfBipsPerSet[i]; j++) {
          int* bips = bipsOfDropSet[i];
          printf("%i ", bips[j]);
        }
      printf("\n");
    }

    printf("==> Testing bips of taxa \n");
    for(int i = 1; i < tr->mxtips + 1; i++) {
      printf("Bips of Taxa %i: ", i);
        for(int j = 0; j < numberOfBipsPerTaxa[i]; j++) {
        int* bips = bipsOfTaxa[i];
        printf("%i ", bips[j]);
        }
      printf("\n");
    }



  printf("==> Unique Sets: ");
  for(int i = 0; i < numberOfUniqueSets; i++) {
    int j = 0;
    int* set = uniqSets[i];
    while(set[j] > -1) {
      printf("%i ",set[j]);
      j++;
    }
    printf("; ");
  }
  printf("\n");

  printf("==> setsToUniqSets: ");
  for(int i = 0; i < numberOfSets; i++) {
    printf("%i ",setsToUniqSets[i]);
  }
  printf("\n");

  //=== TREE GRAPH CONSTRUCTION ENDS ===
  printf("Scores: ");
  printBitVector(bvec_scores);
  
  printf("BipsPerTree: ");
  for(int foo = 0; foo < tr->numberOfTrees; foo++) {

    printf("%i ",bipsPerTree[foo]);

  } 

  printf("\nInduced Bips: ");
  for(int foo = 0;foo < numberOfBips; foo++) {
    
    printf("%u ",ind_bips[foo]);
  
  }

  printf("\nSmall Tree Bips: ");
  for(int foo = 0;foo < numberOfBips; foo++) {
  
    printf("%u ",s_bips[foo]);

  }

  printf("\n == Sets == \n");
  for(int fooo = 0; fooo < numberOfSets; fooo++){
    printf("Set %i: ", fooo);
    int i = 0;
    while(sets[fooo][i] > -1) {
     printf("%i ",sets[fooo][i]);
     i++;
    }
    printf("\n");
  }
  printf("\n");

  #endif

  printBothOpen("Number of small trees skipped: %d\n\n", tr->numberOfTrees - numberOfTreesAnalyzed);
  
  printBothOpen("Average RF distance %f\n\n", avgRF / (double)numberOfTreesAnalyzed);
  
  printBothOpen("Large Tree: %i, Number of SmallTrees analyzed: %i \n\n", tr->mxtips, numberOfTreesAnalyzed); 
  
  printBothOpen("Total execution time: %f secs\n\n", gettime() - masterTime);
   
  printBothOpen("File containing all %d pair-wise RF distances written to file %s\n\n", numberOfTreesAnalyzed, rfFileName);

  printBothOpen("execution stats:\n\n");
  printBothOpen("Accumulated time Effective algorithm: %.5f sec \n", sumEffectivetime);
  printBothOpen("Average time for effective: %.10f sec \n",sumEffectivetime / (double)numberOfTreesAnalyzed);
  printBothOpen("Preprocessingtime: %0.5f sec \n\n", preprocessendtime);
 

  fclose(treeFile);
  fclose(rfFile);    
  
  /* free the data structure used for parsing the potentially multi-furcating tree */

  freeMultifurcations(smallTree);
  rax_free(smallTree);

  rax_free(taxonToLabel);
  rax_free(taxonToEulerIndex);
  rax_free(labelToTaxon);
  rax_free(eulerIndexToLabel);
  rax_free(taxonToReduction);
  rax_free(taxonHasDeg);
}