示例#1
0
unsigned n_homologous(const alignment& A,int s1,int s2) 
{
  unsigned same =0;
  for(int i=0;i<A.length();i++) 
  {
    if (A.character(i,s1) and A.character(i,s2))
      same++;
  }

  return same;;
}
示例#2
0
unsigned n_with_identity(const alignment& A,int s1,int s2,double I)
{
  // Get matches
  vector<int> F(A.length()+1);

  unsigned L=0;
  unsigned T = 0;
  F[0]=0;
  for(int i=0;i<A.length();i++) 
  {
    if (not A.character(i,s1) and not A.character(i,s2)) continue;

    L++;
    
    if (A(i,s1) == A(i,s2))
      T++;

    F[L] = T;
  }
  F.resize(L+1);

  // Get positions
  vector<int> FI(T+1);
  FI[0]=0;
  for(int i=0;i<L;i++)
    if (F[i+1] > F[i])
      FI[F[i+1]] = i+1;

  // tag positions that 
  vector<int> tagged(L,0);

  const unsigned w = 4;
  for(int i=1;i<=T;i++) {
    for(int j=20;j>=w;j--) {
      int i2 = i+j;
      if (i2 > T) continue;
      assert(FI[i]  > 0 and FI[i]  <=L);
      assert(FI[i2] > 0 and FI[i2] <=L);
      assert(FI[i2] > FI[i]);

      if (double(i2-i+1)/(FI[i2]-FI[i]+1) > I) {
	for(int k=FI[i];k<=FI[i2];k++)
	  tagged[k-1]=1;
	break;
      }
    }
  }

  return sum(tagged);
}
示例#3
0
vector<int> alignment_row_letters(const alignment& A, int i)
{
  vector<int> s;
  for(int c=0;c<A.length();c++)
    if (A.character(c,i))
      s.push_back(A(c,i));
  return s;
}
示例#4
0
int n_characters(const alignment& A, int column) 
{
  int count=0;
  for(int i=0;i<A.n_sequences();i++)
    if (A.character(column,i))
      count++;
  return count;
}
示例#5
0
double fraction_homologous(const alignment& A,int s1,int s2) 
{
  unsigned total=0;
  unsigned same =0;
  for(int i=0;i<A.length();i++) 
  {
    if (not A.character(i,s1) and not A.character(i,s2)) 
      continue;

    total++;

    if (A.character(i,s1) and A.character(i,s2))
      same++;
  }

  double f = 1;
  if (total > 0)
    f = double(same)/total;

  return f;
}
示例#6
0
/// Replace each letter with its position in its sequence
ublas::matrix<int> M(const alignment& A1) 
{
  ublas::matrix<int> A2(A1.length(),A1.n_sequences());
  for(int i=0;i<A2.size2();i++) {
    int pos=0;
    for(int column=0;column<A2.size1();column++) {
      if (A1.character(column,i)) {
	A2(column,i) = pos;
	pos++;
      }
      else
	A2(column,i) = A1(column,i);
    }

    assert(pos == A1.seqlength(i));
  }
  return A2;
}
示例#7
0
/// Construct a mapping of letters to columns for each leaf sequence
vector< vector<int> > column_lookup(const alignment& A,int nleaves) 
{
  if (nleaves == -1)
    nleaves = A.n_sequences();

  vector< vector<int> > result(nleaves);

  for(int i=0;i<nleaves;i++) {
    vector<int>& columns = result[i];
    columns.reserve(A.length());
    for(int column=0;column<A.length();column++) {
      if (A.character(column,i))
	columns.push_back(column);
    }
  }

  return result;
}
示例#8
0
/// \brief Check if internal node characters are only present between leaf charaters.
///
/// \param A The alignment
/// \param T The tree
bool check_leaf_characters_minimally_connected(const alignment& A,const Tree& T)
{
  assert(A.n_sequences() == T.n_nodes());

  for(int column=0;column<A.length();column++)
  {
    // construct leaf presence/absence mask
    dynamic_bitset<> present(T.n_nodes());
    for(int i=0;i<T.n_nodes();i++)
      present[i] = not A.gap(column,i);
    
    // compute presence/absence for internal nodes
    connect_all_characters(T,present);

    // put present characters into the alignment.
    for(int i=T.n_leaves();i<T.n_nodes();i++)
      if (present[i] != A.character(column,i))
	return false;
  }
  return true;
}
示例#9
0
bool all_gaps(const alignment& A,int column) {
  for(int i=0;i<A.n_sequences();i++)
    if (A.character(column,i))
      return false;
  return true;
}
示例#10
0
bool all_gaps(const alignment& A,int column,const boost::dynamic_bitset<>& mask) {
  for(int i=0;i<A.n_sequences();i++)
    if (mask[i] and A.character(column,i))
      return false;
  return true;
}