Ejemplo n.º 1
0
void SumProduct::initColumn (const map<AlignRowIndex,char>& seq) {
  ungappedRows.clear();
  gappedCol = vguard<char> (tree.nodes(), Alignment::gapChar);
  vguard<int> ungappedKids (tree.nodes(), 0);
  roots.clear();
  map<size_t,SeqIdx> pos;
  for (TreeNodeIndex r = 0; r < tree.nodes(); ++r)
    if (seq.find(r) != seq.end()) {
      const char c = seq.at(r);
      gappedCol[r] = model.isValidSymbol(c) ? c : Alignment::wildcardChar;
      ungappedRows.push_back(r);
    }

  LogThisAt(7,"Column " << join(gappedCol,"") << " ungappedRows=(" << to_string_join(ungappedRows) << ")" << endl);
  
  for (TreeNodeIndex r = 0; r < tree.nodes(); ++r)
    if (isGap(r)) {
      for (int cpt = 0; cpt < components(); ++cpt) {
	fill (E[cpt][r].begin(), E[cpt][r].end(), 1);
	logE[cpt][r] = 0;
      }
    } else {
      //      Require (isWild(r) || ungappedKids[r] == 0, "At node %u (%s), char %c: internal node sequences must be wildcards (%c)", r, tree.seqName(r).c_str(), seq.at(r), Alignment::wildcardChar);
      const TreeNodeIndex rp = tree.parentNode(r);
      if (rp < 0 || isGap(rp))
	roots.push_back (r);
      else
	++ungappedKids[rp];
    }
}
Ejemplo n.º 2
0
static double evaluateGTRCAT_SAVE (int *cptr, int *wptr,
				   double *x1_start, double *x2_start, double *tipVector, 		      
				   unsigned char *tipX1, int n, double *diagptable_start,
				   double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
{
  double  sum = 0.0, term;       
  int     i;

  double  *diagptable, 
    *x1, 
    *x2,
    *x1_ptr = x1_start,
    *x2_ptr = x2_start;
 
  if(tipX1)
    {           
      for (i = 0; i < n; i++) 
	{	
	  double t[2] __attribute__ ((aligned (BYTE_ALIGNMENT)));
	  __m128d x1v1, x1v2, x2v1, x2v2, dv1, dv2;

	  x1 = &(tipVector[4 * tipX1[i]]);

	  if(isGap(x2_gap, i))
	    x2 = x2_gapColumn;
	  else
	    {
	      x2 = x2_ptr;
	      x2_ptr += 4;
	    }
	  
	  diagptable = &diagptable_start[4 * cptr[i]];
	  	    	  
	  x1v1 =  _mm_load_pd(&x1[0]);
	  x1v2 =  _mm_load_pd(&x1[2]);
	  x2v1 =  _mm_load_pd(&x2[0]);
	  x2v2 =  _mm_load_pd(&x2[2]);
	  dv1  =  _mm_load_pd(&diagptable[0]);
	  dv2  =  _mm_load_pd(&diagptable[2]);
	  
	  x1v1 = _mm_mul_pd(x1v1, x2v1);
	  x1v1 = _mm_mul_pd(x1v1, dv1);
	  
	  x1v2 = _mm_mul_pd(x1v2, x2v2);
	  x1v2 = _mm_mul_pd(x1v2, dv2);
	  
	  x1v1 = _mm_add_pd(x1v1, x1v2);
	  
	  _mm_store_pd(t, x1v1);
	  	  
	  term = LOG(FABS(t[0] + t[1]));
	      
	 

	  sum += wptr[i] * term;
	}	
    }               
  else
    {
      for (i = 0; i < n; i++) 
Ejemplo n.º 3
0
vguard<vguard<LogProb> > SumProduct::logNodeExcludedPostProb (TreeNodeIndex node, TreeNodeIndex exclude, bool normalize) const {
  Require (!isGap(node), "Attempt to find posterior probability of sequence at gapped position");
  const UnvalidatedAlphTok tok = isWild(node) ? -1 : model.tokenize(gappedCol[node]);
  vguard<LogProb> lppInit (model.alphabetSize(), isWild(node) ? 0 : -numeric_limits<double>::infinity());
  if (!isWild(node))
    lppInit[tok] = 0;
  vguard<vguard<LogProb> > v (model.components(), lppInit);
  LogProb norm = -numeric_limits<double>::infinity();
  for (int cpt = 0; cpt < components(); ++cpt) {
    vguard<LogProb>& lpp = v[cpt];
    for (auto& lp: lpp)
      lp += logCptWeight[cpt];
    for (size_t nc = 0; nc < tree.nChildren(node); ++nc) {
      const TreeNodeIndex child = tree.getChild(node,nc);
      if (child != exclude)
	for (AlphTok i = 0; i < model.alphabetSize(); ++i)
	  lpp[i] += log (E[cpt][child][i]) + logE[cpt][child];
    }
    const TreeNodeIndex parent = tree.parentNode (node);
    for (AlphTok i = 0; i < model.alphabetSize(); ++i) {
      lpp[i] += parent == exclude
	? 0 // to add a prior for orphaned nodes, this should be log(insProb[i]), but that complicates MCMC etc
	: (log(G[cpt][node][i]) + logG[cpt][node]);
      log_accum_exp (norm, lpp[i]);
    }
  }
  if (normalize)
    for (auto& lpp: v)
      for (auto& lp: lpp)
	lp -= norm;
  return v;
}
Ejemplo n.º 4
0
string aligned_string(TRow& row)
{
    typedef typename Iterator<TRow>::Type TRowIterator;

    string result(length(row), '-');

    TRowIterator it = begin(row);
    TRowIterator itEnd = end(row);
    for(int i = 0; it != itEnd; ++it, ++i) {
        if(!isGap(it))
            result[i] = *it;
    }
    return result;
}
Ejemplo n.º 5
0
void SumProduct::fillDown() {
  for (int cpt = 0; cpt < components(); ++cpt) {
    LogThisAt(8,"Sending root-to-tip messages, component #" << cpt << " column " << join(gappedCol,"") << endl);
    if (!columnEmpty()) {
      for (auto r: preorder) {
	if (!isGap(r)) {
	  const TreeNodeIndex rp = tree.parentNode(r);
	  if (rp < 0 || isGap(rp)) {
	    G[cpt][r] = insProb[cpt];
	    logG[cpt][r] = 0;
	  } else {
	    const vguard<TreeNodeIndex> rsibs = tree.getSiblings(r);
	    logG[cpt][r] = logG[cpt][rp];
	    for (auto rs: rsibs)
	      logG[cpt][r] += logE[cpt][rs];
	    for (AlphTok j = 0; j < model.alphabetSize(); ++j) {
	      double Gj = 0;
	      for (AlphTok i = 0; i < model.alphabetSize(); ++i) {
		double p = G[cpt][rp][i] * branchSubProb[cpt][r][i][j];
		for (auto rs: rsibs)
		  if (!isGap(rs))
		    p *= E[cpt][rs][i];
		Gj += p;
	      }
	      G[cpt][r][j] = Gj;
	    }
	  }
	}

	LogThisAt(10,"Row " << setw(3) << r << " " << cpt << " " << gappedCol[r]
		  << " logG=" << setw(9) << setprecision(3) << logG[cpt][r]
		  << " G=(" << to_string_join(G[cpt][r]," ",9,3) << ")" << endl);
      }
    }
  }
}
Ejemplo n.º 6
0
static double evaluateGTRCATPROT_SAVE (int *cptr, int *wptr,
				       double *x1, double *x2, double *tipVector,
				       unsigned char *tipX1, int n, double *diagptable_start, 
				       double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
{
  double   
    sum = 0.0, 
    term,
    *diagptable,  
    *left, 
    *right,
    *left_ptr = x1,
    *right_ptr = x2;
  
  int     
    i, 
    l;                           
  
  if(tipX1)
    {                 
      for (i = 0; i < n; i++) 
	{	       	
	  left = &(tipVector[20 * tipX1[i]]);

	  if(isGap(x2_gap, i))
	    right = x2_gapColumn;
	  else
	    {
	      right = right_ptr;
	      right_ptr += 20;
	    }	  	 
	  
	  diagptable = &diagptable_start[20 * cptr[i]];	           	 

	  __m128d tv = _mm_setzero_pd();	    
	  
	  for(l = 0; l < 20; l+=2)
	    {
	      __m128d lv = _mm_load_pd(&left[l]);
	      __m128d rv = _mm_load_pd(&right[l]);
	      __m128d mul = _mm_mul_pd(lv, rv);
	      __m128d dv = _mm_load_pd(&diagptable[l]);
	      
	      tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));		   
	    }		 		
	  
	  tv = _mm_hadd_pd(tv, tv);
	  _mm_storel_pd(&term, tv);
    
	  
	  term = LOG(FABS(term));
	  	  
	  sum += wptr[i] * term;
	}      
    }    
  else
    {
    
      for (i = 0; i < n; i++) 
	{		       	      	      	  
	  if(isGap(x1_gap, i))
	    left = x1_gapColumn;
	  else
	    {
	      left = left_ptr;
	      left_ptr += 20;
	    }
	  
	  if(isGap(x2_gap, i))
	    right = x2_gapColumn;
	  else
	    {
	      right = right_ptr;
	      right_ptr += 20;
	    }
	  
	  diagptable = &diagptable_start[20 * cptr[i]];	  	

	  __m128d tv = _mm_setzero_pd();	    
	  
	  for(l = 0; l < 20; l+=2)
	    {
	      __m128d lv = _mm_load_pd(&left[l]);
	      __m128d rv = _mm_load_pd(&right[l]);
	      __m128d mul = _mm_mul_pd(lv, rv);
	      __m128d dv = _mm_load_pd(&diagptable[l]);
	      
	      tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));		   
	    }		 		
	  
	  tv = _mm_hadd_pd(tv, tv);
	  _mm_storel_pd(&term, tv);
	  	  
	  term = LOG(FABS(term));	 
	  
	  sum += wptr[i] * term;      
	}
    }
             
  return  sum;         
} 
Ejemplo n.º 7
0
void SumProduct::fillUp() {
  colLogLike = -numeric_limits<double>::infinity();
  for (int cpt = 0; cpt < components(); ++cpt) {
    LogThisAt(8,"Sending tip-to-root messages, component #" << cpt << " column " << join(gappedCol,"") << endl);
    cptLogLike[cpt] = 0;
    for (auto r : postorder) {
      logF[cpt][r] = 0;
      for (size_t nc = 0; nc < tree.nChildren(r); ++nc)
	logF[cpt][r] += logE[cpt][tree.getChild(r,nc)];
      if (!isGap(r)) {
	const char c = gappedCol[r];
	if (Alignment::isWildcard(c)) {
	  double Fmax = 0;
	  for (AlphTok i = 0; i < model.alphabetSize(); ++i) {
	    double Fi = 1;
	    for (size_t nc = 0; nc < tree.nChildren(r); ++nc)
	      Fi *= E[cpt][tree.getChild(r,nc)][i];
	    F[cpt][r][i] = Fi;
	    if (Fi > Fmax)
	      Fmax = Fi;
	  }
	  if (Fmax < SUMPROD_RESCALE_THRESHOLD) {
	    for (auto& Fi: F[cpt][r])
	      Fi /= Fmax;
	    logF[cpt][r] += log (Fmax);
	  }
	} else {  // !isWild(r)
	  const AlphTok tok = model.tokenize(c);
	  double Ftok = 1;
	  for (size_t nc = 0; nc < tree.nChildren(r); ++nc)
	    Ftok *= E[cpt][tree.getChild(r,nc)][tok];

	  if (Ftok < SUMPROD_RESCALE_THRESHOLD) {
	    logF[cpt][r] += log (Ftok);
	    Ftok = 1;
	  }

	  for (AlphTok i = 0; i < model.alphabetSize(); ++i)
	    F[cpt][r][i] = 0;
	  F[cpt][r][tok] = Ftok;
	}

	LogThisAt(10,"Row " << setw(3) << r << " " << c << " " << cpt
		  << " logF=" << setw(9) << setprecision(3) << logF[cpt][r]
		  << " F=(" << to_string_join(F[cpt][r]," ",9,3) << ")" << endl);

	const TreeNodeIndex rp = tree.parentNode(r);
	if (rp < 0 || isGap(rp))
	  cptLogLike[cpt] += logF[cpt][r] + log (inner_product (F[cpt][r].begin(), F[cpt][r].end(), insProb[cpt].begin(), 0.));
	else {
	  logE[cpt][r] = logF[cpt][r];
	  for (AlphTok i = 0; i < model.alphabetSize(); ++i) {
	    double Ei = 0;
	    for (AlphTok j = 0; j < model.alphabetSize(); ++j)
	      Ei += branchSubProb[cpt][r][i][j] * F[cpt][r][j];
	    E[cpt][r][i] = Ei;
	  }
	}
      }
    }
    log_accum_exp (colLogLike, logCptWeight[cpt] + cptLogLike[cpt]);
  }
}