Ejemplo n.º 1
0
void
profileToConsensus (
  ProfileType const & profile,
  Sequence<ResidueType> & sequence
)
{
  uint32_t profile_length = profile.length();
  sequence.reinitialize( profile_length );
  for( uint32_t pos_i = 0; pos_i < profile_length; pos_i++ ) {
    sequence[ pos_i ] =
      profile[ pos_i ][ Emission::Match ].maximumValueType();
  }
  // That's it.
  return;
} // profileToConsensus( ProfileType const &, Sequence & )
int
convert_to_galosh_profile ( P7_HMM * hmm, ProfileType & profile )
{
  typedef typename galosh::profile_traits<ProfileType>::ResidueType ResidueType;

  int status;

  uint32_t pos_i; // Position in profile.  Corresponds to one less than match state pos in HMM.
  uint32_t res_i;
  ESL_DSQ hmmer_digitized_residue;

  /* How many match states in the HMM? */
  if( hmm->M == 0 ) { status = eslENORESULT; goto ERROR; }
  profile.reinitialize( static_cast<uint32_t>( hmm->M ) );

  profile.zero();

  // NOTE that HMMER3 has a slightly different model, starting in
  // Begin rather than in preAlign, and with 3 legal transitions out
  // of Begin (one of these is to PreAlign).  The galosh profile model
  // begins in preAlign and transitions to Begin, and from there to
  // either Match or Delete.  One implication is that galosh profiles
  // enforce t[ 0 ][ p7H_MI ] to be the same as t[ 0 ][ p7H_II ], but
  // HMMER3 does not.  Another way to say this is that H3 uses affine
  // pre-aligns, and prohibits pre-align -to- delete transitions,
  // whereas galosh / profillic uses non-affine pre-aligns and allows
  // pre-align->delete.

  // fromPreAlign
  profile[ galosh::Transition::fromPreAlign ][ galosh::TransitionFromPreAlign::toPreAlign ] =
    hmm->t[ 0 ][ p7H_II ];
  profile[ galosh::Transition::fromPreAlign ][ galosh::TransitionFromPreAlign::toBegin ] =
    hmm->t[ 0 ][ p7H_IM ];
  for( res_i = 0; res_i < seqan::ValueSize<ResidueType>::VALUE; res_i++ ) {
    hmmer_digitized_residue =
      esl_abc_DigitizeSymbol( hmm->abc, static_cast<char>( ResidueType( res_i ) ) );
    // See below where it says "TODO/NOTE"..
    profile[ galosh::Emission::PreAlignInsertion ][ res_i ] =
      hmm->ins[ 0 ][ hmmer_digitized_residue ];
  }

  // fromBegin
  profile[ galosh::Transition::fromBegin ][ galosh::TransitionFromBegin::toMatch ] =
    ( hmm->t[ 0 ][ p7H_MM ] / ( 1.0 - hmm->t[ 0 ][ p7H_MI ] ) );
  profile[ galosh::Transition::fromBegin ][ galosh::TransitionFromBegin::toDeletion ] =
    ( 1.0 - profile[ galosh::Transition::fromBegin ][ galosh::TransitionFromBegin::toMatch ] );

  for( pos_i = 0; pos_i < profile.length(); pos_i++ ) {
//    if( be_verbose ) {
//      cout << '.';
//      cout.flush();
//    }
    // TODO: If this is too slow, memoize the ResidueType( res_i )s.
    for( res_i = 0; res_i < seqan::ValueSize<ResidueType>::VALUE; res_i++ ) {
      hmmer_digitized_residue =
        esl_abc_DigitizeSymbol( hmm->abc, static_cast<char>( ResidueType( res_i ) ) );
      profile[ pos_i ][ galosh::Emission::Match ][ res_i ] =
        hmm->mat[ pos_i + 1 ][ hmmer_digitized_residue ];
      if( pos_i == ( profile.length() - 1 ) ) {
        // Use post-align insertions
        profile[ galosh::Emission::PostAlignInsertion ][ res_i ] =
          hmm->ins[ pos_i + 1 ][ hmmer_digitized_residue ];
      } else { // if this is the last position (use post-align insertions) .. else ..
        profile[ galosh::Emission::Insertion ][ res_i ] +=
          hmm->ins[ pos_i + 1 ][ hmmer_digitized_residue ];
      } // End if this is the last position (use post-align insertions) .. else ..
    } // End foreach res_i
    if( pos_i == ( profile.length() - 1 ) ) {
      // Use post-align insertions
      profile[ galosh::Transition::fromPostAlign ][ galosh::TransitionFromPostAlign::toTerminal ] =
        hmm->t[ pos_i + 1 ][ p7H_IM ];
      profile[ galosh::Transition::fromPostAlign ][ galosh::TransitionFromPostAlign::toPostAlign ] =
        ( 1.0 - profile[ galosh::Transition::fromPostAlign ][ galosh::TransitionFromPostAlign::toTerminal ] );
    } else {  // if this is the last position (use post-align insertions) .. else ..
      profile[ galosh::Transition::fromMatch ][ galosh::TransitionFromMatch::toMatch ] +=
        hmm->t[ pos_i + 1 ][ p7H_MM ];
      profile[ galosh::Transition::fromMatch ][ galosh::TransitionFromMatch::toInsertion ] +=
        hmm->t[ pos_i + 1 ][ p7H_MI ];
      profile[ galosh::Transition::fromMatch ][ galosh::TransitionFromMatch::toDeletion ] +=
        hmm->t[ pos_i + 1 ][ p7H_MD ];
  
      profile[ galosh::Transition::fromInsertion ][ galosh::TransitionFromInsertion::toMatch ] +=
        hmm->t[ pos_i + 1 ][ p7H_IM ];
      profile[ galosh::Transition::fromInsertion ][ galosh::TransitionFromInsertion::toInsertion ] +=
        hmm->t[ pos_i + 1 ][ p7H_II ];
      profile[ galosh::Transition::fromDeletion ][ galosh::TransitionFromDeletion::toMatch ] +=
        hmm->t[ pos_i + 1 ][ p7H_DM ];
      profile[ galosh::Transition::fromDeletion ][ galosh::TransitionFromDeletion::toDeletion ] +=
        hmm->t[ pos_i + 1 ][ p7H_DD ];
    } // End if this is the last position (use post-align insertions) .. else ..
  } // End foreach pos_i

  // Normalize with 0 as the minimum value we'll allow.  Note that in
  // profillic and profuse, it's generally 1E-5, so when the profile
  // is read in by those programs, it might be slightly altered.
  profile.normalize( 0 );
  return eslOK;

 ERROR:
  return status;
} // convert_to_galosh_profile (..)