/// \brief Construct an alignment between multiple lists of residues by pulling together residues of the same name. /// /// Does this code necessarily need to construct an alignment /// (as opposed to just identifying the coordinates to superpose)? /// /// Advantages: /// - It helps to illustrate how things are being superposed /// - It makes it easier to check for (and disallow) conflicts in the ordering of residues /// - It makes it possible to SSAP-score the alignment and then perform a weighted superposition /// /// Disadvantages: /// - It makes things a bit more complicated /// - Any decisions that have to be taken about how to align the non-matching residues are arbitrary /// (but may misinterpreted as meaningful). /// /// Where there are choices about which entry's position to add first: /// - add the longest entry's position first (helps to give consistent answers for testing) /// - add the first entry's position first alignment residue_name_aligner::residue_name_align(const residue_name_vec_vec &arg_residue_lists ///< TODOCUMENT ) { // Check that there is at least one list const str_vec_vec::size_type num_lists = arg_residue_lists.size(); if (num_lists < 1) { BOOST_THROW_EXCEPTION(invalid_argument_exception("Cannot residue_name_align() zero residue lists")); } // Check that at least one of the lists is not empty // \todo Change this to use C++11 any_of() and use a lambda to check for not empty bool found_non_empty = false; for (const residue_name_vec &arg_residue_list : arg_residue_lists) { if ( ! arg_residue_list.empty() ) { found_non_empty = true; } } if ( ! found_non_empty ) { BOOST_THROW_EXCEPTION(invalid_argument_exception("Cannot residue_name_align() residue lists that are all empty")); } // Build a vector of residue_name_align_map objects, one for each arg_residue_list vector<residue_name_align_map> maps; maps.reserve(num_lists); for (const residue_name_vec &residue_list : arg_residue_lists) { maps.push_back( residue_name_align_map( make_residue_name_align_map( residue_list ) ) ); } // Data structures to create the alignment: size_vec next_index_to_add_for_lists(num_lists, 0); opt_aln_posn_vec_vec raw_alignment_data( num_lists ); // Do the actual work of building an alignment bool more_to_do = true; while (more_to_do) { more_to_do = false; // This is made a bit more complicated to ensure that the longer entry's position // is inserted first where there is a choice bool found_non_skipping = false; size_t entry_of_non_skipping = 0; // Search to find an entry for which a new entry can be inserted into the alignment for (size_t entry_ctr = 0; entry_ctr < num_lists; ++entry_ctr) { // If this entry is complete, then continue to the next entry // otherwise, grab the next residue string for this entry // and record that there's more_to_do const size_t &next_index_to_add = next_index_to_add_for_lists[entry_ctr]; if ( next_index_to_add >= arg_residue_lists[ entry_ctr ].size() ) { continue; } const residue_name &the_res_name = arg_residue_lists[entry_ctr][next_index_to_add]; more_to_do = true; // If a better or equal non-skipping has already been found (ie for an entry that's at least as long as this one) // then no point considering this one so continue to next pass of loop if (found_non_skipping && arg_residue_lists[entry_of_non_skipping].size() >= arg_residue_lists[entry_ctr].size()) { continue; } // Find which entries have equivalent residues and the indices of those equivalents opt_aln_posn_vec equivalent_indices; equivalent_indices.reserve( num_lists ); for (const residue_name_align_map &map : maps) { const opt_aln_posn value = contains_residue_name( map, the_res_name ) ? opt_aln_posn( get_index_of_residue_name( map, the_res_name ) ) : opt_aln_posn( none ); equivalent_indices.push_back( value ); } // Check whether inserting this row of of equivalents would involve skipping anything bool found_skip_here = false; for (size_t entry_check_ctr = 0; entry_check_ctr < num_lists; ++entry_check_ctr) { const opt_aln_posn &position = equivalent_indices[entry_check_ctr]; if ( position && *position != next_index_to_add_for_lists[ entry_check_ctr ] ) { if ( *position < next_index_to_add_for_lists[entry_check_ctr]) { BOOST_THROW_EXCEPTION( invalid_argument_exception( "Whilst aligning residue names, residue " + lexical_cast<string>( the_res_name ) + " is out of order (in entry " + lexical_cast<string>( entry_ctr ) + " and entry " + lexical_cast<string>( entry_check_ctr ) + " around indices " + lexical_cast<string>( *position ) + " and " + lexical_cast<string>( next_index_to_add_for_lists[ entry_check_ctr ] ) + ")" ) ); } found_skip_here = true; break; } } // If we've found a skip here then it's better than before If we've found a better skip than before then record it if ( ! found_skip_here ) { entry_of_non_skipping = entry_ctr; found_non_skipping = true; } else { } } // If this entry is ready to be inserted, then proceed and break out of this loop if ( found_non_skipping ) { const residue_name &the_res_name = arg_residue_lists[entry_of_non_skipping][next_index_to_add_for_lists[entry_of_non_skipping]]; // Find which entries have equivalent residues bool_deq equivalent_presences; for (const residue_name_align_map &map : maps) { equivalent_presences.push_back( contains_residue_name( map, the_res_name ) ); } // Insert the new positions and increment the relevant indices in next_index_to_add_for_lists for (size_t entry_check_ctr = 0; entry_check_ctr < num_lists; ++entry_check_ctr) { const bool &should_insert_entry = equivalent_presences[ entry_check_ctr ]; const opt_aln_posn value = should_insert_entry ? next_index_to_add_for_lists[ entry_check_ctr ] : opt_aln_posn( none ); raw_alignment_data[ entry_check_ctr ].push_back( value ); if ( should_insert_entry ) { ++( next_index_to_add_for_lists[ entry_check_ctr ] ); } } } if ( more_to_do && ! found_non_skipping ) { BOOST_THROW_EXCEPTION(invalid_argument_exception("Unable to align residue names, this probably means the residue ordering is not consistent")); } } // Create and return an alignment from the data return alignment( raw_alignment_data ); }
/// \brief Checks that residue_name_aligner::residue_name_align() does the correct thing for all permutations of the residue lists. /// /// This should be accessed via check_residue_name_aligner_results() or check_residue_name_aligner_throws() /// /// This subroutine is mostly ready to handle more than two lists. /// /// \todo modify the actual call to residue_name_aligner::residue_name_align() to handle multiple lists /// (after that subroutine has been altered to handle multiple lists) void do_check_residue_name_aligner(const residue_name_vec_vec &arg_residue_lists, const bool_deq_vec &arg_correct_presence_lists, const size_vec_vec &arg_correct_answer_lists, const bool &arg_should_throw ) { const size_t num_lists = arg_residue_lists.size(); BOOST_REQUIRE_EQUAL(num_lists, 2_z); /// This code isn't yet able to process more than two at a time if (!arg_should_throw) { BOOST_REQUIRE_EQUAL(num_lists, arg_correct_presence_lists.size()); BOOST_REQUIRE_EQUAL(num_lists, arg_correct_answer_lists.size()); } // Construct a vector containing the indices of the lists size_vec permutation_indices(num_lists, 0); for (size_t index_ctr = 0; index_ctr < num_lists; ++index_ctr) { permutation_indices[index_ctr] = index_ctr; } // Loop over the permutations of the indices do { // If these residue lists should cause residue_name_aligner::residue_name_align() to throw then check they do if (arg_should_throw) { BOOST_CHECK_THROW( residue_name_aligner::residue_name_align( { arg_residue_lists[ permutation_indices[ 0 ] ], arg_residue_lists[ permutation_indices[ 1 ] ] } ), invalid_argument_exception ); } // Otherwise check the results from residue_name_aligner::residue_name_align() else { // Construct an alignment from this permutation of residue lists const alignment my_alignment = residue_name_aligner::residue_name_align( { arg_residue_lists[permutation_indices[ 0 ]], arg_residue_lists[permutation_indices[ 1 ]] } ); const alignment::size_type num_positions = my_alignment.length(); // Check each of the alignment entries in turn for (size_t index_ctr = 0; index_ctr < num_lists; ++index_ctr) { // Grab the correct answer list under the current permutation const size_t permutation_index = permutation_indices[index_ctr]; const bool_deq &correct_presence_list = arg_correct_presence_lists[permutation_index]; const size_vec &correct_answer_list = arg_correct_answer_lists[permutation_index]; const size_t correct_answer_size = correct_answer_list.size(); // Check that the number of positions match BOOST_CHECK_EQUAL(correct_answer_size, num_positions); // Check that each of the positions in the alignment match what is expected for (size_t position_ctr = 0; position_ctr < min(correct_answer_size, num_positions); ++position_ctr) { const aln_posn_opt position = my_alignment.position_of_entry_of_index( index_ctr, position_ctr ); const bool has_position = static_cast<bool>( position ); BOOST_CHECK_EQUAL( correct_presence_list[position_ctr], has_position ); if ( position ) { BOOST_CHECK_EQUAL(correct_answer_list[position_ctr], *position ); } } } } } while ( next_permutation( permutation_indices ) ); }