Пример #1
0
/// \brief Construct an alignment between multiple lists of residues by pulling together residues of the same name.
///
/// Does this code necessarily need to construct an alignment
/// (as opposed to just identifying the coordinates to superpose)?
///
/// Advantages:
///  - It helps to illustrate how things are being superposed
///  - It makes it easier to check for (and disallow) conflicts in the ordering of residues
///  - It makes it possible to SSAP-score the alignment and then perform a weighted superposition
///
/// Disadvantages:
///  - It makes things a bit more complicated
///  - Any decisions that have to be taken about how to align the non-matching residues are arbitrary
///    (but may misinterpreted as meaningful).
///
/// Where there are choices about which entry's position to add first:
///  - add the longest entry's position first (helps to give consistent answers for testing)
///  - add the first entry's position first
alignment residue_name_aligner::residue_name_align(const residue_name_vec_vec &arg_residue_lists ///< TODOCUMENT
                                                   ) {
	// Check that there is at least one list
	const str_vec_vec::size_type num_lists = arg_residue_lists.size();
	if (num_lists < 1) {
		BOOST_THROW_EXCEPTION(invalid_argument_exception("Cannot residue_name_align() zero residue lists"));
	}
	// Check that at least one of the lists is not empty
	// \todo Change this to use C++11 any_of() and use a lambda to check for not empty
	bool found_non_empty = false;
	for (const residue_name_vec &arg_residue_list : arg_residue_lists) {
		if ( ! arg_residue_list.empty() ) {
			found_non_empty = true;
		}
	}
	if ( ! found_non_empty ) {
		BOOST_THROW_EXCEPTION(invalid_argument_exception("Cannot residue_name_align() residue lists that are all empty"));
	}

	// Build a vector of residue_name_align_map objects, one for each arg_residue_list
	vector<residue_name_align_map> maps;
	maps.reserve(num_lists);
	for (const residue_name_vec &residue_list : arg_residue_lists) {
		maps.push_back( residue_name_align_map( make_residue_name_align_map( residue_list ) ) );
	}

	// Data structures to create the alignment:
	size_vec next_index_to_add_for_lists(num_lists, 0);
	opt_aln_posn_vec_vec raw_alignment_data( num_lists );

	// Do the actual work of building an alignment
	bool more_to_do = true;
	while (more_to_do) {
		more_to_do = false;

		// This is made a bit more complicated to ensure that the longer entry's position
		// is inserted first where there is a choice
		bool found_non_skipping = false;
		size_t entry_of_non_skipping = 0;
		// Search to find an entry for which a new entry can be inserted into the alignment
		for (size_t entry_ctr = 0; entry_ctr < num_lists; ++entry_ctr) {

			// If this entry is complete, then continue to the next entry
			// otherwise, grab the next residue string for this entry
			// and record that there's more_to_do
			const size_t &next_index_to_add = next_index_to_add_for_lists[entry_ctr];
			if ( next_index_to_add >= arg_residue_lists[ entry_ctr ].size() ) {
				continue;
			}
			const residue_name &the_res_name = arg_residue_lists[entry_ctr][next_index_to_add];
			more_to_do = true;

			// If a better or equal non-skipping has already been found (ie for an entry that's at least as long as this one)
			// then no point considering this one so continue to next pass of loop
			if (found_non_skipping && arg_residue_lists[entry_of_non_skipping].size() >= arg_residue_lists[entry_ctr].size()) {
				continue;
			}

			// Find which entries have equivalent residues and the indices of those equivalents
			opt_aln_posn_vec equivalent_indices;
			equivalent_indices.reserve( num_lists );
			for (const residue_name_align_map &map : maps) {
				const opt_aln_posn value = contains_residue_name( map, the_res_name ) ? opt_aln_posn( get_index_of_residue_name( map, the_res_name ) )
				                                                                      : opt_aln_posn( none );
				equivalent_indices.push_back( value );
			}

			// Check whether inserting this row of of equivalents would involve skipping anything
			bool found_skip_here = false;
			for (size_t entry_check_ctr = 0; entry_check_ctr < num_lists; ++entry_check_ctr) {
				const opt_aln_posn &position = equivalent_indices[entry_check_ctr];
				if ( position && *position != next_index_to_add_for_lists[ entry_check_ctr ] ) {
					if ( *position < next_index_to_add_for_lists[entry_check_ctr]) {
						BOOST_THROW_EXCEPTION(
							invalid_argument_exception(
								"Whilst aligning residue names, residue " + lexical_cast<string>( the_res_name    )
								+ " is out of order (in entry "           + lexical_cast<string>( entry_ctr       )
								+ " and entry "                           + lexical_cast<string>( entry_check_ctr )
								+ " around indices "                      + lexical_cast<string>( *position       )
								+ " and "                                 + lexical_cast<string>( next_index_to_add_for_lists[ entry_check_ctr ] )
								+ ")"
							)
						);
					}
					found_skip_here = true;
					break;
				}
			}

			// If we've found a skip here then it's better than before If we've found a better skip than before then record it
			if ( ! found_skip_here ) {
				entry_of_non_skipping = entry_ctr;
				found_non_skipping = true;
			}
			else {
			}
		}

		// If this entry is ready to be inserted, then proceed and break out of this loop
		if ( found_non_skipping ) {
			const residue_name &the_res_name = arg_residue_lists[entry_of_non_skipping][next_index_to_add_for_lists[entry_of_non_skipping]];

			// Find which entries have equivalent residues
			bool_deq equivalent_presences;
			for (const residue_name_align_map &map : maps) {
				equivalent_presences.push_back( contains_residue_name( map, the_res_name ) );
			}

			// Insert the new positions and increment the relevant indices in next_index_to_add_for_lists
			for (size_t entry_check_ctr = 0; entry_check_ctr < num_lists; ++entry_check_ctr) {
				const bool &should_insert_entry = equivalent_presences[ entry_check_ctr ];
				const opt_aln_posn value = should_insert_entry ? next_index_to_add_for_lists[ entry_check_ctr ]
				                                               : opt_aln_posn( none );
				raw_alignment_data[ entry_check_ctr ].push_back( value );
				if ( should_insert_entry ) {
					++( next_index_to_add_for_lists[ entry_check_ctr ] );
				}
			}
		}

		if ( more_to_do && ! found_non_skipping ) {
			BOOST_THROW_EXCEPTION(invalid_argument_exception("Unable to align residue names, this probably means the residue ordering is not consistent"));
		}
	}

	// Create and return an alignment from the data
	return alignment( raw_alignment_data );
}
			/// \brief Checks that residue_name_aligner::residue_name_align() does the correct thing for all permutations of the residue lists.
			///
			/// This should be accessed via check_residue_name_aligner_results() or check_residue_name_aligner_throws()
			///
			/// This subroutine is mostly ready to handle more than two lists.
			///
			/// \todo modify the actual call to residue_name_aligner::residue_name_align() to handle multiple lists
			///       (after that subroutine has been altered to handle multiple lists)
			void do_check_residue_name_aligner(const residue_name_vec_vec  &arg_residue_lists,
			                                   const bool_deq_vec          &arg_correct_presence_lists,
			                                   const size_vec_vec          &arg_correct_answer_lists,
			                                   const bool                  &arg_should_throw
			                                   ) {
				const size_t num_lists = arg_residue_lists.size();
				BOOST_REQUIRE_EQUAL(num_lists, 2_z); /// This code isn't yet able to process more than two at a time
				if (!arg_should_throw) {
					BOOST_REQUIRE_EQUAL(num_lists, arg_correct_presence_lists.size());
					BOOST_REQUIRE_EQUAL(num_lists, arg_correct_answer_lists.size());
				}

				// Construct a vector containing the indices of the lists
				size_vec permutation_indices(num_lists, 0);
				for (size_t index_ctr = 0; index_ctr < num_lists; ++index_ctr) {
					permutation_indices[index_ctr] = index_ctr;
				}

				// Loop over the permutations of the indices
				do {
					// If these residue lists should cause residue_name_aligner::residue_name_align() to throw then check they do
					if (arg_should_throw) {
						BOOST_CHECK_THROW(
							residue_name_aligner::residue_name_align(
								{ arg_residue_lists[ permutation_indices[ 0 ] ],
								  arg_residue_lists[ permutation_indices[ 1 ] ] }
							),
							invalid_argument_exception
						);
					}
					// Otherwise check the results from residue_name_aligner::residue_name_align()
					else {
						// Construct an alignment from this permutation of residue lists
						const alignment my_alignment = residue_name_aligner::residue_name_align(
							{ arg_residue_lists[permutation_indices[ 0 ]],
							  arg_residue_lists[permutation_indices[ 1 ]] }
						);
						const alignment::size_type num_positions = my_alignment.length();

						// Check each of the alignment entries in turn
						for (size_t index_ctr = 0; index_ctr < num_lists; ++index_ctr) {
							// Grab the correct answer list under the current permutation
							const size_t permutation_index           = permutation_indices[index_ctr];
							const bool_deq &correct_presence_list = arg_correct_presence_lists[permutation_index];
							const size_vec &correct_answer_list      = arg_correct_answer_lists[permutation_index];

							const size_t correct_answer_size         = correct_answer_list.size();

							// Check that the number of positions match
							BOOST_CHECK_EQUAL(correct_answer_size, num_positions);

							// Check that each of the positions in the alignment match what is expected
							for (size_t position_ctr  = 0; position_ctr < min(correct_answer_size, num_positions); ++position_ctr) {
								const aln_posn_opt position     = my_alignment.position_of_entry_of_index( index_ctr, position_ctr );
								const bool         has_position = static_cast<bool>( position );
								BOOST_CHECK_EQUAL( correct_presence_list[position_ctr], has_position );

								if ( position ) {
									BOOST_CHECK_EQUAL(correct_answer_list[position_ctr], *position );
								}
							}
						}
					}
				} while ( next_permutation( permutation_indices ) );
			}