Пример #1
0
 void setDiagonalRM(matrix_t & Q)
 {
   for (unsigned i = 0; i < Q.size1(); ++ i)
     Q(i, i) = 0;
   for (unsigned i = 0; i < Q.size1(); ++ i)
     Q(i, i) = - sum( row (Q, i) );
 }
Пример #2
0
  // Q is a quadratic rate ublas::matrix fulfilling detailed balance. 
  vector_t deriveEquiFreqForReversibleRM(matrix_t const & Q)
  {
    assert( Q.size1() == Q.size1() ); // Q is quadratic
    unsigned int size = Q.size1();

    // define refState
    vector_t equiFreq(size);
    equiFreq.clear();
    unsigned refState = nonZeroDiagonalEntryOrDie(Q);
    equiFreq(refState) = 1;

    // calculate un-normalized equi-freqs 
    // This code could be cleaned up given conditions on the form of the rate ublas::matrix
    unsigned remaining = countNonZeroDiagonalEntries(Q) - countNonZeroEntries(equiFreq);
    while ( remaining ) {
      deriveEquiFreqs(Q, equiFreq);

      // any nondetermined?
      unsigned nonZero = countNonZeroEntries(equiFreq);
      if (nonZero == remaining)
	errorAbort("deriveEquiFreqFromReversibleRM: Bad rateUblas::Matrix (non-reversible?), can't derive equiFreq. Caught in loop");

      remaining = countNonZeroDiagonalEntries(Q) - nonZero;
    }

    equiFreq *= 1 / sum(equiFreq);
    return equiFreq;
  }
Пример #3
0
std::pair< cvector_t, matrix_t > eig( const matrix_t &mat ) {
    cvector_t vals(  mat.size1() );
    matrix_t right_vecs( mat.size1(), mat.size2() );
    matrix_t * dummy = 0;
    matrix_t temp(mat);
    lapack::geev( temp, vals, dummy, &right_vecs, lapack::optimal_workspace() );
    return std::make_pair( vals, right_vecs ); }
Пример #4
0
 AbstractFullyParameterizedFactor::AbstractFullyParameterizedFactor(string const & type, string const & id, matrix_t const & m, matrix_t const & pseudoCounts) 
   : AbstractBaseFactor(type, id, m.size1(), m.size2()), m_(m), pseudoCounts_(pseudoCounts)
 {
   if (pseudoCounts.size1() != 0) {
     assert(pseudoCounts.size1() == size1_); 
     assert(pseudoCounts.size2() == size2_); 
   }
 }
Пример #5
0
  unsigned countNonZeroDiagonalEntries(matrix_t const & Q)
  {
    assert( Q.size1() == Q.size1() ); // Q is quadratic

    unsigned n = 0;
    for (unsigned i = 0; i < Q.size1(); i++)
      if (Q(i, i) != 0.0)
	n++;
    return n;
  }
Пример #6
0
  unsigned nonZeroDiagonalEntryOrDie(matrix_t const & Q)
  {
    assert( Q.size1() == Q.size1() ); // Q is quadratic

    for (unsigned i = 0; i < Q.size1(); i++)
      if (Q(i, i) != 0.0)
	return i;

    errorAbort("All zero diagonal entries. Bails out.");
    return 0; // will never reach this, only to quiet compiler
  }
Пример #7
0
matrix_t SubHMM::lift_transition(const matrix_t &m) const {
  matrix_t n = zero_matrix(n_original_states, n_original_states);
  for (size_t i = 0; i < m.size1(); i++)
    for (size_t j = 0; j < m.size2(); j++)
      n(lift[i], lift[j]) = m(i, j);
  return n;
}
Пример #8
0
// QR Factorization of a MxN General Matrix A.
//    a       (IN/OUT - matrix(M,N)) On entry, the coefficient matrix A. On exit , the upper triangle and diagonal is the min(M,N) by N upper triangular matrix R.  The lower triangle, together with the tau vector, is the orthogonal matrix Q as a product of min(M,N) elementary reflectors.
//    tau     (OUT - vector (min(M,N))) Vector of the same numerical type as A. The scalar factors of the elementary reflectors.
//    info    (OUT - int)
//   0   : function completed normally
//   < 0 : The ith argument, where i = abs(return value) had an illegal value.
int geqrf (matrix_t& a, vector_t& tau)
{
	int              _m = int(a.size1());
	int              _n = int(a.size2());
	int              _lda = int(a.size1());
	int              _info;

	// make_sure tau's size is greater than or equal to min(m,n)
	if (int(tau.size()) < (_n<_m ? _n : _m) )
		return -104;

	int ldwork = _n*_n;
	vector_t dwork(ldwork);
	rawLAPACK::geqrf (_m, _n, a.data().begin(), _lda, tau.data().begin(), dwork.data().begin(), ldwork, _info);

	return _info;
}
Пример #9
0
  void deriveEquiFreqs(matrix_t const & Q, vector_t & equiFreq)
  {
    unsigned size = Q.size1();

    for (unsigned int j = 0; j < size; j++)
      if (equiFreq(j) == 0 && Q(j, j) != 0.0 )
	for (unsigned int i = 0; i < size; i++) 
	  if (equiFreq(i) != 0.0 && Q(j, i) != 0.0 )
	    equiFreq(j) = equiFreq(i) * Q(i, j) / Q(j, i);
  }
Пример #10
0
  void normalizeRM(matrix_t & Q, StateMap const & staMap, float subs)
  {
    vector_t equiFreq = deriveEquiFreqForReversibleRM(Q);
    HammingDistance hammingDistance(staMap);
    number_t normConst = 0;
    for (unsigned i = 0; i < Q.size1(); ++ i)
      for (unsigned j = 0; j < Q.size2(); ++ j)
	normConst += equiFreq(i) * Q(i,j) * hammingDistance(i,j);

    Q = Q * (subs / normConst);
  }
Пример #11
0
void output_matrix(ostream& out, matrix_t& M, unsigned n) {
	out << "matrix_t M[" << n << "] = {\n";
	for (unsigned i=0; i<M.size1(); ++i) {
		out << "\t{ ";
		out << M(i,0);
		for (unsigned j=1; j<M.size2() ; ++j)
			out << ',' << M(i,j);
		out << " },\n";
	}
	out << "}\n";
}
Пример #12
0
// LU factorization of a general matrix A.  
//    Computes an LU factorization of a general M-by-N matrix A using
//    partial pivoting with row interchanges. Factorization has the form
//    A = P*L*U.
//    a       (IN/OUT - matrix(M,N)) On entry, the coefficient matrix A to be factored. On exit, the factors L and U from the factorization A = P*L*U.
//    ipivot  (OUT - vector(min(M,N))) Integer vector. The row i of A was interchanged with row IPIV(i).
//    info    (OUT - int)
//   0   :  successful exit
//   < 0 :  If INFO = -i, then the i-th argument had an illegal value.
//   > 0 :  If INFO = i, then U(i,i) is exactly zero. The  factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
int getrf (matrix_t& a, pivot_t& ipivot)
{
	matrix_t::value_type* _a = a.data().begin();
	int _m = int(a.size1());
	int _n = int(a.size2());
	int _lda = _m;	// minor size
	int _info;

	rawLAPACK::getrf (_m, _n,	_a, _lda, ipivot.data().begin(), _info);

	return _info;
}
Пример #13
0
// Solution to a system using LU factorization 
//   Solves a system of linear equations A*X = B with a general NxN
//   matrix A using the LU factorization computed by GETRF.
//   transa  (IN - char)  'T' for the transpose of A, 'N' otherwise.
//   a       (IN - matrix(M,N)) The factors L and U from the factorization A = P*L*U as computed by GETRF.
//   ipivot  (IN - vector(min(M,N))) Integer vector. The pivot indices from GETRF; row i of A was interchanged with row IPIV(i).
//   b       (IN/OUT - matrix(ldb,NRHS)) Matrix of same numerical type as A. On entry, the right hand side matrix B. On exit, the solution matrix X.
//
//   info    (OUT - int)
//   0   : function completed normally
//   < 0 : The ith argument, where i = abs(return value) had an illegal value.
//   > 0 : if INFO =  i,  U(i,i)  is  exactly  zero;  the  matrix is singular and its inverse could not be computed.
int getrs (char transa, matrix_t& a,
	    pivot_t& ipivot, matrix_t& b)
{
	matrix_t::value_type* _a = a.data().begin();
	int a_n = int(a.size1());
	int _lda = a_n;
	int p_n = int(ipivot.size());

	matrix_t::value_type* _b = b.data().begin();
	int b_n = int(b.size1());
	int _ldb = b_n;
	int _nrhs = int(b.size2()); /* B's size2 is the # of vectors on rhs */

	if (a_n != b_n) /*Test to see if AX=B has correct dimensions */
		return -101;
	if (p_n < a_n)     /*Check to see if ipivot is big enough */
		return -102;

	int _info;
	rawLAPACK::getrs (transa, a_n, _nrhs, _a,	_lda, ipivot.data().begin(), 
				_b, _ldb, _info);

	return _info;
} 
Пример #14
0
bool HMM::check_enrichment(const Data::Contrast &contrast,
                           const matrix_t &counts, size_t group_idx) const {
  string motif = groups[group_idx].name;
  double signal = 0, control = 0;
  double total_signal = 0, total_control = 0;
  for (size_t i = 0; i < counts.size1(); i++) {
    if (contrast.sets[i].motifs.find(motif) != contrast.sets[i].motifs.end()) {
      signal += counts(i, 0);
      total_signal += counts(i, 0) + counts(i, 1);
    } else {
      control += counts(i, 0);
      total_control += counts(i, 0) + counts(i, 1);
    }
  }
  bool ok = signal / total_signal > control / total_control;
  return ok;
}
Пример #15
0
HMM::posterior_t HMM::posterior_gradient(const Data::Set &dataset,
                                         const Training::Task &task,
                                         bitmask_t present,
                                         matrix_t &transition_g,
                                         matrix_t &emission_g) const {
  // Training::Task task(task_);
  if (verbosity >= Verbosity::verbose)
    cout << "Posterior gradient calculation (Feature)." << endl;

  SubHMM subhmm(*this, complementary_states_mask(present));

  // cout << subhmm << endl;

  if (verbosity >= Verbosity::debug) {
    cout << "Transition targets are";
    for (auto x : task.targets.transition)
      cout << " " << x;
    cout << endl;
    cout << "Emission targets are";
    for (auto x : task.targets.emission)
      cout << " " << x;
    cout << endl;
  }

  if (not task.targets.transition.empty())
    transition_g = zero_matrix(n_states, n_states);
  if (not task.targets.emission.empty())
    emission_g = zero_matrix(n_states, n_emissions);
  double posterior = 0;
  vector<matrix_t> t_g, e_g;

  Training::Targets reduced_targets = subhmm.map_down(task.targets);

  if (verbosity >= Verbosity::debug) {
    cout << "targets emission = ";
    for (auto &x : task.targets.emission)
      cout << " " << x;
    cout << endl;
    cout << "targets transition = ";
    for (auto &x : task.targets.transition)
      cout << " " << x;
    cout << endl;
    cout << "reduced targets emission = ";
    for (auto &x : reduced_targets.emission)
      cout << " " << x;
    cout << endl;
    cout << "reduced targets transition = ";
    for (auto &x : reduced_targets.transition)
      cout << " " << x;
    cout << endl;
  }

  double l = 0;

#pragma omp parallel shared(emission_g, transition_g) if (DO_PARALLEL)
  {
#pragma omp single
    // Initalize storage for thread intermediate results
    {
      size_t n_threads = omp_get_num_threads();
      //    cout << "B Num threads = " << n_threads << endl;
      if (not task.targets.transition.empty())
        t_g = vector<matrix_t>(
            n_threads, zero_matrix(transition_g.size1(), transition_g.size2()));
      if (not task.targets.emission.empty())
        e_g = vector<matrix_t>(
            n_threads, zero_matrix(emission_g.size1(), emission_g.size2()));
    }

#pragma omp for reduction(+ : posterior, l)
    // Compute gradient for each sequence
    for (size_t i = 0; i < dataset.sequences.size(); i++) {
      int thread_idx = omp_get_thread_num();
      if (verbosity >= Verbosity::debug)
        cout << "Thread " << thread_idx << " Data sample " << i << endl
             << seq2string(dataset.sequences[i].isequence) << endl;

      // Compute expected statistics, for the full and reduced models
      matrix_t T, Tr, E, Er;
      double logp
          = BaumWelchIteration_single(T, E, dataset.sequences[i], task.targets);
      double logpr = subhmm.BaumWelchIteration_single(
          Tr, Er, dataset.sequences[i], reduced_targets);

      Tr = subhmm.lift_transition(Tr);
      Er = subhmm.lift_emission(Er);

      if (verbosity >= Verbosity::debug)
        cout << "Full logp = " << logp << endl << "Reduced logp = " << logpr
             << endl << "Expected transitions full = " << T << endl
             << "Expected emissions full = " << E << endl
             << "Expected transitions constitutive_range = " << Tr << endl
             << "Expected emissions constitutive_range = " << Er << endl;

      if (not task.targets.transition.empty()) {
        // Compute log likelihood gradients for the full model w.r.t. transition
        // probability
        matrix_t t = transition_gradient(T, task.targets.transition);
        // Compute log likelihood gradients for the reduced model w.r.t.
        // transition probability
        matrix_t tr = transition_gradient(Tr, task.targets.transition);

        // Compute posterior probability gradients for the reduced model w.r.t.
        // transition probability and accumulate
        t_g[thread_idx] += exp(logpr - logp) * (t - tr);
      }

      if (not task.targets.emission.empty()) {
        // Compute log likelihood gradients for the full model w.r.t. emission
        // probability
        matrix_t e = emission_gradient(E, task.targets.emission);
        // Compute log likelihood gradients for the reduced model w.r.t.
        // emission probability
        matrix_t er = emission_gradient(Er, task.targets.emission);

        // Compute posterior probability gradients for the reduced model w.r.t.
        // emission probability and accumulate
        e_g[thread_idx] += exp(logpr - logp) * (e - er);
      }

      posterior += 1 - exp(logpr - logp);
      l += logp;
    }

#pragma omp single
    // Collect results of threads
    {
      if (not task.targets.transition.empty())
        for (auto &x : t_g)
          transition_g += x;
      if (not task.targets.emission.empty())
        for (auto &x : e_g)
          emission_g += x;
    }
  }

  if (verbosity >= Verbosity::verbose)
    cout << "The posterior coming from the gradient calculus: " << posterior
         << endl;
  posterior_t result = {l, posterior};
  return result;
}
Пример #16
0
double HMM::log_likelihood_gradient(const Data::Seqs &seqs,
                                    const Training::Targets &targets,
                                    matrix_t &transition_g,
                                    matrix_t &emission_g) const {
  transition_g = zero_matrix(n_states, n_states);
  emission_g = zero_matrix(n_states, n_emissions);

  double lp = 0;
  vector<matrix_t> t_g, e_g;

#pragma omp parallel shared(emission_g, transition_g) if (DO_PARALLEL)
  {
#pragma omp single
    // Initalize storage for thread intermediate results
    {
      size_t n_threads = omp_get_num_threads();
      //    cout << "B Num threads = " << n_threads << endl;
      if (not targets.transition.empty())
        t_g = vector<matrix_t>(
            n_threads, zero_matrix(transition_g.size1(), transition_g.size2()));
      if (not targets.emission.empty())
        e_g = vector<matrix_t>(
            n_threads, zero_matrix(emission_g.size1(), emission_g.size2()));
    }

#pragma omp for reduction(+ : lp)
    // Compute likelihood for each sequence
    for (size_t i = 0; i < seqs.size(); i++) {
      int thread_idx = omp_get_thread_num();
      vector_t scale;
      matrix_t f = compute_forward_scaled(seqs[i], scale);
      matrix_t b = compute_backward_prescaled(seqs[i], scale);

      // Compute expected statistics
      matrix_t T, E;
      double logp = BaumWelchIteration_single(T, E, seqs[i], targets);

      if (not targets.transition.empty())
        // Compute log likelihood gradients w.r.t. transition probability
        t_g[thread_idx] += transition_gradient(T, targets.transition);

      if (not targets.emission.empty())
        // Compute log likelihood gradients w.r.t. emission probability
        e_g[thread_idx] += emission_gradient(E, targets.emission);

      lp += logp;
    }

#pragma omp single
    // Collect results of threads
    {
      if (not targets.transition.empty())
        for (auto &x : t_g)
          transition_g += x;
      if (not targets.emission.empty())
        for (auto &x : e_g)
          emission_g += x;
    }
  }

  return lp;
}