Example #1
0
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Convert alignments to the common format used by maligner dp.
// maligner_ix handles reverse alignments by reversing the reference (since we index the reverse of the reference)
// maligner_dp handles reverse alignments by reversing the query and aligning it to the forward of the reference.
// Therefore, we must carefully convert indices and orientation in this function to end up with a maligner_dp::Alignment.
maligner_dp::AlignmentVec convert_alignments(
  const RefAlignmentVec& ref_alignments,
  const MapWrapper& query,
  MapDB& ref_map_db,
  const Scorer& scorer) {

  using maligner_dp::Score;
  using maligner_dp::MatchedChunk;
  using maligner_dp::MatchedChunkVec;
  using maligner_dp::Chunk;
  using maligner_dp::ChunkVec;
  using maligner_dp::MapData;
  using maligner_dp::AlignmentRescaledScoreComp;

  maligner_dp::AlignmentVec alns;

  const MapData& query_map_data = query.map_data_;
  const Map& query_map = query.map_;

  bool is_circular = false;
  bool is_bounded = false;

  Score zero_score;

  //////////////////////////////////////////////////////////////////////
  // Our approach is to represent the query_chunks the same way that the query map would
  // be presented in maligner_dp for forward and reverse alignments.
  // This means that query_chunks_forward and query_chunks_reverse both must have increasing indices,
  // but query_chunks_reverse has the chunk sizes in the *reverse* direction.
  ChunkVec query_chunks_forward;
  ChunkVec query_chunks_reverse;
  const size_t num_query_frags = query_map.frags_.size();
  for(size_t i = 1; i < num_query_frags - 1; i++) {

    const bool is_boundary_chunk_query = (i == 0) || (i == num_query_frags - 1);

    query_chunks_forward.emplace_back(i, i + 1, query_map.frags_[i], is_boundary_chunk_query);

    query_chunks_reverse.emplace_back(i, i + 1, query_map.frags_[i], is_boundary_chunk_query);
    query_chunks_reverse.back().reverse_coords(num_query_frags);

  }
  std::reverse(query_chunks_reverse.begin(), query_chunks_reverse.end());

  
  ///////////////////////////////////////////////////////////////////////////////////
  // In maligner_ix, ref alignments are oriented forward with respect to the query.
  // In maligner_dp, MatchedChunks must be constructed, oriented with respect to forward strand of reference.
  // This means if the alignment is reverse, we need to reverse the reference chunks,
  // and use the reverse representation of the query chunks.
  for(auto i = ref_alignments.begin(); i != ref_alignments.end(); i++) {
    
    const ReferenceAlignment& ref_alignment = *i;
    const Map * p_ref_map = ref_alignment.get_map();
    const MapWrapper& ref = ref_map_db.find(p_ref_map->name_)->second;
    const MapData& ref_map_data = ref.map_data_;
    const bool ref_is_circular = ref.is_circular();

    /////////////////////////////////////////////////////////////////////////////////////////
    // Extract the reference chunks as maligner_dp chunks.
    ChunkVec ref_chunks;
    for(auto rc = ref_alignment.chunks_.begin(); rc != ref_alignment.chunks_.end(); rc++) {

      const MapChunk* p_chunk = *rc;

      bool ref_chunk_is_boundary = ref_is_circular && !opt::ref_is_bounded && 
        ( (p_chunk->start_ == 0) || (p_chunk->end_ == ref.num_frags()) );

      ref_chunks.emplace_back(p_chunk->start_, p_chunk->end_, p_chunk->size_, ref_chunk_is_boundary);

    }

    // Orient the reference chunks forward if necessary
    ChunkVec * p_query_chunks = &query_chunks_forward;
    if(ref_alignment.is_reverse()) {
      std::reverse(ref_chunks.begin(), ref_chunks.end());
      p_query_chunks = &query_chunks_reverse;
    }


    // std::cerr << "query:\n\t" << *p_query_chunks << "\n"
    //           << "ref:\n\t" << ref_chunks << "\n"
    //           << "is_forward: " << ref_alignment.is_forward()
    //           << "\n";

    /////////////////////////
    // Build matched chunks
    const ChunkVec& query_chunks = *p_query_chunks;
    if (query_chunks.size() != ref_chunks.size()) {
      std::cerr << "query_frags: " << query_map.frags_.size() << " query_chunks: " << query_chunks.size() << " ref_chunks: " << ref_chunks.size() << std::endl;
      throw std::runtime_error("query chunks does not match ref_chunks size.");
    }

    MatchedChunkVec matched_chunks;
    Score total_score;
    const size_t num_matched_chunks = query_chunks.size();
    for(size_t i = 0; i < num_matched_chunks; i++) {
      Score score = scorer.compute_score(query_chunks[i], ref_chunks[i]);
      total_score += score;
      matched_chunks.emplace_back(query_chunks[i], ref_chunks[i], score);
    }

    /////////////////////////
    // Construct Alignment
    bool aln_is_forward = ref_alignment.is_forward();
    maligner_dp::Alignment aln(matched_chunks, total_score, query_map_data, ref_map_data, aln_is_forward);
    if (!aln_is_forward) aln.flip_query_coords();
    aln.add_alignment_locs(query.ix_to_locs_, ref.ix_to_locs_);
    alns.push_back(std::move(aln));

  }

  // Sort the alignments by total rescaled score
  std::sort(alns.begin(), alns.end(), AlignmentRescaledScoreComp());

  alns = sift_alignments(alns, ref_map_db);

  return alns;

}