void UnitigGraph::Refresh_() { omp_lock_t reassemble_lock; omp_init_lock(&reassemble_lock); static AtomicBitVector marked; marked.reset(vertices_.size()); // update the sdbg #pragma omp parallel for for (vertexID_t i = 0; i < vertices_.size(); ++i) { if (vertices_[i].is_dead && !vertices_[i].is_deleted) { int64_t cur_node = vertices_[i].end_node; while (cur_node != vertices_[i].start_node) { sdbg_->SetInvalid(cur_node); cur_node = sdbg_->UniqueIncoming(cur_node); assert(cur_node != -1); cur_node = sdbg_->GetLastIndex(cur_node); } sdbg_->SetInvalid(cur_node); if (vertices_[i].rev_end_node != vertices_[i].end_node) { cur_node = vertices_[i].rev_end_node; while (cur_node != vertices_[i].rev_start_node) { sdbg_->SetInvalid(cur_node); cur_node = sdbg_->UniqueIncoming(cur_node); assert(cur_node != -1); cur_node = sdbg_->GetLastIndex(cur_node); } sdbg_->SetInvalid(cur_node); } vertices_[i].is_deleted = true; } } #pragma omp parallel for for (vertexID_t i = 0; i < vertices_.size(); ++i) { if (vertices_[i].is_deleted) { continue; } int dir; if (assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].start_node) == -1) { dir = 0; } else if (assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].rev_start_node) == -1) { dir = 1; } else { continue; } if (!marked.lock(i)) { continue; } std::vector<std::pair<vertexID_t, bool> > linear_path; // first: vertex_id, second: is_rc int64_t cur_end = dir == 0 ? vertices_[i].end_node : vertices_[i].rev_end_node; int64_t new_start = dir == 0 ? vertices_[i].start_node : vertices_[i].rev_start_node; int64_t new_rc_end = dir == 0 ? vertices_[i].rev_end_node : vertices_[i].end_node; while (true) { int64_t next_start = assembly_algorithms::NextSimplePathNode(*sdbg_, cur_end); if (next_start == -1) { break; } auto next_vertex_iter = start_node_map_.find(next_start); assert(next_vertex_iter != start_node_map_.end()); UnitigGraphVertex &next_vertex = vertices_[next_vertex_iter->second]; assert(!next_vertex.is_deleted); bool is_rc = next_vertex.start_node != next_start; linear_path.push_back(std::make_pair(next_vertex_iter->second, is_rc)); cur_end = is_rc ? next_vertex.rev_end_node : next_vertex.end_node; } if (linear_path.empty()) { continue; } if (i != linear_path.back().first && !marked.lock(linear_path.back().first)) { // if i == linear_path.back().first it is a palindrome self loop if (linear_path.back().first > i) { marked.unset(i); continue; } else { while (!marked.lock(linear_path.back().first)) { // wait for the other thread release the lock } } } // assemble the linear path int64_t depth = vertices_[i].depth; int64_t length = vertices_[i].length; for (unsigned j = 0; j < linear_path.size(); ++j) { UnitigGraphVertex &next_vertex = vertices_[linear_path[j].first]; length += next_vertex.length; depth += next_vertex.depth; next_vertex.is_deleted = true; } vertices_[i].length = length; vertices_[i].depth = depth; int64_t new_end; int64_t new_rc_start; if (linear_path.back().second) { new_end = vertices_[linear_path.back().first].rev_end_node; new_rc_start = vertices_[linear_path.back().first].start_node; } else { new_end = vertices_[linear_path.back().first].end_node; new_rc_start = vertices_[linear_path.back().first].rev_start_node; } vertices_[i].start_node = new_start; vertices_[i].end_node = new_end; vertices_[i].rev_start_node = new_rc_start; vertices_[i].rev_end_node = new_rc_end; vertices_[i].is_changed = true; if (i == linear_path.back().first) { vertices_[i].is_deleted = false; } } // looped path #pragma omp parallel for for (vertexID_t i = 0; i < vertices_.size(); ++i) { if (!vertices_[i].is_deleted && !marked.get(i)) { omp_set_lock(&reassemble_lock); if (!vertices_[i].is_deleted && !marked.get(i)) { uint32_t length = vertices_[i].length; int64_t depth = vertices_[i].depth; vertices_[i].is_changed = true; vertices_[i].is_loop = true; vertices_[i].is_deleted = true; bool is_palindrome = false; int64_t cur_end = vertices_[i].end_node; while (true) { int64_t next_start = assembly_algorithms::NextSimplePathNode(*sdbg_, cur_end); assert(next_start != -1); if (next_start == vertices_[i].start_node) { break; } auto next_vertex_iter = start_node_map_.find(next_start); assert(next_vertex_iter != start_node_map_.end()); UnitigGraphVertex &next_vertex = vertices_[next_vertex_iter->second]; if (next_vertex.is_deleted) { // that means the loop has alrealy gone through its rc is_palindrome = true; } length += next_vertex.length; depth += next_vertex.depth; next_vertex.is_deleted = true; cur_end = (next_vertex.start_node == next_start) ? next_vertex.end_node : next_vertex.rev_end_node; } vertices_[i].depth = depth; vertices_[i].length = length; vertices_[i].is_palindrome = is_palindrome; vertices_[i].end_node = sdbg_->GetLastIndex(assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].start_node)); vertices_[i].rev_start_node = sdbg_->ReverseComplement(vertices_[i].end_node); vertices_[i].rev_end_node = sdbg_->ReverseComplement(vertices_[i].start_node); } omp_unset_lock(&reassemble_lock); } } #pragma omp parallel for for (vertexID_t i = 0; i < vertices_.size(); ++i) { if (!vertices_[i].is_deleted) { start_node_map_[vertices_[i].rev_start_node] = i; } } omp_destroy_lock(&reassemble_lock); }
int64_t Trim(SuccinctDBG &dbg, int len, int min_final_contig_len) { int64_t number_tips = 0; omp_lock_t path_lock; omp_init_lock(&path_lock); marked.reset(dbg.size); #pragma omp parallel for reduction(+:number_tips) for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) { if (dbg.IsValidNode(node_idx) && !marked.get(node_idx) && dbg.IsLast(node_idx) && dbg.OutdegreeZero(node_idx)) { vector<int64_t> path = {node_idx}; int64_t prev_node; int64_t cur_node = node_idx; bool is_tip = false; for (int i = 1; i < len; ++i) { prev_node = dbg.UniqueIncoming(cur_node); if (prev_node == -1) { is_tip = dbg.IndegreeZero(cur_node) && (i + dbg.kmer_k - 1 < min_final_contig_len); break; } else if (dbg.UniqueOutgoing(prev_node) == -1) { is_tip = true; break; } else { path.push_back(prev_node); cur_node = prev_node; } } if (is_tip) { for (unsigned i = 0; i < path.size(); ++i) { MarkNode(dbg, path[i]); } ++number_tips; } } } #pragma omp parallel for reduction(+:number_tips) for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) { if (dbg.IsValidNode(node_idx) && dbg.IsLast(node_idx) && !marked.get(node_idx) && dbg.IndegreeZero(node_idx)) { vector<int64_t> path = {node_idx}; int64_t next_node; int64_t cur_node = node_idx; bool is_tip = false; for (int i = 1; i < len; ++i) { next_node = dbg.UniqueOutgoing(cur_node); if (next_node == -1) { is_tip = dbg.OutdegreeZero(cur_node) && (i + dbg.kmer_k - 1 < min_final_contig_len); break; } else if (dbg.UniqueIncoming(next_node) == -1) { is_tip = true; } else { path.push_back(next_node); cur_node = next_node; } } if (is_tip) { for (unsigned i = 0; i < path.size(); ++i) { MarkNode(dbg, path[i]); } ++number_tips; } } } #pragma omp parallel for for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) { if (marked.get(node_idx)) { dbg.SetInvalid(node_idx); } } return number_tips; }