void Graph::insertSequenceAlignment(const Alignment& alignment, const string& seq, const string& label) { const string& aln_sequence = alignment.sequence(); const deque<int>& seq_idxs = alignment.seq_idxs(); const deque<int>& node_ids = alignment.node_ids(); int first_id = -1; int head_id = -1; int tail_id = -1; pair<uint32_t, uint32_t> prefix_end_ids; pair<uint32_t, uint32_t> suffix_end_ids; // because of local alignment prefix or sufix of sequence can be unaligned // so we add it directly to graph deque<uint32_t> valid_seq_idxs; for (auto idx : seq_idxs) { if (idx != -1) { valid_seq_idxs.emplace_back(idx); } } uint32_t aln_seq_start_idx = valid_seq_idxs.front(); uint32_t aln_seq_end_idx = valid_seq_idxs.back(); if (aln_seq_start_idx > 0) { prefix_end_ids = addUnmatchedSequence( aln_sequence.substr(0, aln_seq_start_idx), label, false); first_id = prefix_end_ids.first; head_id = prefix_end_ids.second; } if (aln_seq_end_idx < aln_sequence.length()) { suffix_end_ids = addUnmatchedSequence( aln_sequence.substr(aln_seq_end_idx + 1), label, false); tail_id = suffix_end_ids.first; } // aligned part of sequence uint32_t size = max(seq_idxs.size(), node_ids.size()); for (uint32_t i = 0; i < size; ++i) { auto& seq_idx = seq_idxs[i]; auto& match_id = node_ids[i]; if (seq_idx == -1) { continue; } int node_id = -1; char base = aln_sequence[seq_idx]; if (match_id == -1) { // if sequence base unmatched with graph node add new node addNode(base); node_id = next_id_ - 1; } else if (nodes_[match_id]->base() == base) { // if sequence base matched to a node with same base node_id = match_id; } else { // if sequence base matched to a node with different base // which is aligned to a node with same base int found_node_id = -1; for (auto id : nodes_[match_id]->getAlignedIds()) { if (nodes_[id]->base() == base) { found_node_id = id; break; } } if (found_node_id == -1) { // we didn't find aligned node with same base addNode(base); node_id = next_id_ - 1; // add all aligned to nodes to newly created node for (auto id : nodes_[match_id]->getAlignedIds()) { nodes_[node_id]->addAlignedNode(id); } nodes_[node_id]->addAlignedNode(match_id); // to nodes aligned to newly created node add this node // as aligned to for (auto id : nodes_[node_id]->getAlignedIds()) { nodes_[id]->addAlignedNode(node_id); } } else { // node id is found node id node_id = found_node_id; } } if (head_id != -1 && node_id != -1) { addEdge(head_id, node_id, label); } head_id = node_id; if (first_id == -1) { first_id = head_id; } } // connect aligned part with unaligned suffix if (head_id != -1 && tail_id != -1) { addEdge(head_id, tail_id, label); } // resort nodes order topological_sort(); sequences_.emplace_back(seq); labels_.emplace_back(label); start_ids_.emplace_back(first_id); }