Example #1
0
void UnitigGraph::Refresh_() {
    omp_lock_t reassemble_lock;
    omp_init_lock(&reassemble_lock);
    static AtomicBitVector marked;
    marked.reset(vertices_.size());

    // update the sdbg
#pragma omp parallel for
    for (vertexID_t i = 0; i < vertices_.size(); ++i) {
        if (vertices_[i].is_dead && !vertices_[i].is_deleted) {
            int64_t cur_node = vertices_[i].end_node;
            while (cur_node != vertices_[i].start_node) {
                sdbg_->SetInvalid(cur_node);
                cur_node = sdbg_->UniqueIncoming(cur_node);
                assert(cur_node != -1);
                cur_node = sdbg_->GetLastIndex(cur_node);
            }
            sdbg_->SetInvalid(cur_node);

            if (vertices_[i].rev_end_node != vertices_[i].end_node) {
                cur_node = vertices_[i].rev_end_node;
                while (cur_node != vertices_[i].rev_start_node) {
                    sdbg_->SetInvalid(cur_node);
                    cur_node = sdbg_->UniqueIncoming(cur_node);
                    assert(cur_node != -1);
                    cur_node = sdbg_->GetLastIndex(cur_node);
                }
                sdbg_->SetInvalid(cur_node);
            }

            vertices_[i].is_deleted = true;
        }
    }

#pragma omp parallel for
    for (vertexID_t i = 0; i < vertices_.size(); ++i) {
        if (vertices_[i].is_deleted) { continue; }
        int dir;
        if (assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].start_node) == -1) {
            dir = 0;
        } else if (assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].rev_start_node) == -1) {
            dir = 1;
        } else {
            continue;
        }

        if (!marked.lock(i)) { continue; }

        std::vector<std::pair<vertexID_t, bool> > linear_path; // first: vertex_id, second: is_rc
        int64_t cur_end = dir == 0 ? vertices_[i].end_node : vertices_[i].rev_end_node;
        int64_t new_start = dir == 0 ? vertices_[i].start_node : vertices_[i].rev_start_node;
        int64_t new_rc_end = dir == 0 ? vertices_[i].rev_end_node : vertices_[i].end_node;

        while (true) {
            int64_t next_start = assembly_algorithms::NextSimplePathNode(*sdbg_, cur_end);
            if (next_start == -1) {
                break;
            }

            auto next_vertex_iter = start_node_map_.find(next_start);
            assert(next_vertex_iter != start_node_map_.end());
            UnitigGraphVertex &next_vertex = vertices_[next_vertex_iter->second];
            assert(!next_vertex.is_deleted);

            bool is_rc = next_vertex.start_node != next_start;
            linear_path.push_back(std::make_pair(next_vertex_iter->second, is_rc));

            cur_end = is_rc ? next_vertex.rev_end_node : next_vertex.end_node;
        }

        if (linear_path.empty()) { continue; }

        if (i != linear_path.back().first && !marked.lock(linear_path.back().first)) { // if i == linear_path.back().first it is a palindrome self loop
            if (linear_path.back().first > i) {
                marked.unset(i);
                continue;
            } else {
                while (!marked.lock(linear_path.back().first)) {
                    // wait for the other thread release the lock
                }
            }
        }

        // assemble the linear path

        int64_t depth = vertices_[i].depth;
        int64_t length = vertices_[i].length;

        for (unsigned j = 0; j < linear_path.size(); ++j) {
            UnitigGraphVertex &next_vertex = vertices_[linear_path[j].first];
            length += next_vertex.length;
            depth += next_vertex.depth;
            next_vertex.is_deleted = true;
        }

        vertices_[i].length = length;
        vertices_[i].depth = depth;

        int64_t new_end;
        int64_t new_rc_start;
        if (linear_path.back().second) {
            new_end = vertices_[linear_path.back().first].rev_end_node;
            new_rc_start = vertices_[linear_path.back().first].start_node;
        } else {
            new_end = vertices_[linear_path.back().first].end_node;
            new_rc_start = vertices_[linear_path.back().first].rev_start_node;
        }

        vertices_[i].start_node = new_start;
        vertices_[i].end_node = new_end;
        vertices_[i].rev_start_node = new_rc_start;
        vertices_[i].rev_end_node = new_rc_end;
        vertices_[i].is_changed = true;
        if (i == linear_path.back().first) {
            vertices_[i].is_deleted = false;
        }
    }

    // looped path
#pragma omp parallel for
    for (vertexID_t i = 0; i < vertices_.size(); ++i) {
        if (!vertices_[i].is_deleted && !marked.get(i)) {
            omp_set_lock(&reassemble_lock);
            if (!vertices_[i].is_deleted && !marked.get(i)) {
                uint32_t length = vertices_[i].length;
                int64_t depth = vertices_[i].depth;

                vertices_[i].is_changed = true;
                vertices_[i].is_loop = true;
                vertices_[i].is_deleted = true;
                bool is_palindrome = false;

                int64_t cur_end = vertices_[i].end_node;
                while (true) {
                    int64_t next_start = assembly_algorithms::NextSimplePathNode(*sdbg_, cur_end);
                    assert(next_start != -1);
                    if (next_start == vertices_[i].start_node) {
                        break;
                    }

                    auto next_vertex_iter = start_node_map_.find(next_start);
                    assert(next_vertex_iter != start_node_map_.end());
                    UnitigGraphVertex &next_vertex = vertices_[next_vertex_iter->second];

                    if (next_vertex.is_deleted) {
                        // that means the loop has alrealy gone through its rc
                        is_palindrome = true;
                    }

                    length += next_vertex.length;
                    depth += next_vertex.depth;
                    next_vertex.is_deleted = true;

                    cur_end = (next_vertex.start_node == next_start) ? next_vertex.end_node : next_vertex.rev_end_node;
                }

                vertices_[i].depth = depth;
                vertices_[i].length = length;
                vertices_[i].is_palindrome = is_palindrome;
                vertices_[i].end_node = sdbg_->GetLastIndex(assembly_algorithms::PrevSimplePathNode(*sdbg_, vertices_[i].start_node));
                vertices_[i].rev_start_node = sdbg_->ReverseComplement(vertices_[i].end_node);
                vertices_[i].rev_end_node = sdbg_->ReverseComplement(vertices_[i].start_node);
            }
            omp_unset_lock(&reassemble_lock);
        }
    }

#pragma omp parallel for
    for (vertexID_t i = 0; i < vertices_.size(); ++i) {
        if (!vertices_[i].is_deleted) {
            start_node_map_[vertices_[i].rev_start_node] = i;
        }
    }

    omp_destroy_lock(&reassemble_lock);
}
Example #2
0
int64_t Trim(SuccinctDBG &dbg, int len, int min_final_contig_len) {
    int64_t number_tips = 0;
    omp_lock_t path_lock;
    omp_init_lock(&path_lock);
    marked.reset(dbg.size);

#pragma omp parallel for reduction(+:number_tips)  
    for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) {
        if (dbg.IsValidNode(node_idx) && !marked.get(node_idx) && dbg.IsLast(node_idx) && dbg.OutdegreeZero(node_idx)) {
            vector<int64_t> path = {node_idx};
            int64_t prev_node;
            int64_t cur_node = node_idx;
            bool is_tip = false;
            for (int i = 1; i < len; ++i) {
                prev_node = dbg.UniqueIncoming(cur_node);
                if (prev_node == -1) {
                    is_tip = dbg.IndegreeZero(cur_node) && (i + dbg.kmer_k - 1 < min_final_contig_len);
                    break;
                } else if (dbg.UniqueOutgoing(prev_node) == -1) {
                    is_tip = true;
                    break;
                } else {
                    path.push_back(prev_node);
                    cur_node = prev_node;
                }
            }

            if (is_tip) {
                for (unsigned i = 0; i < path.size(); ++i) {
                    MarkNode(dbg, path[i]);
                }
                ++number_tips;
            }
        }
    }

#pragma omp parallel for reduction(+:number_tips)
    for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) {
        if (dbg.IsValidNode(node_idx) && dbg.IsLast(node_idx) && !marked.get(node_idx) && dbg.IndegreeZero(node_idx)) {
            vector<int64_t> path = {node_idx};
            int64_t next_node;
            int64_t cur_node = node_idx;
            bool is_tip = false;
            for (int i = 1; i < len; ++i) {
                next_node = dbg.UniqueOutgoing(cur_node);
                if (next_node == -1) {
                    is_tip = dbg.OutdegreeZero(cur_node) && (i + dbg.kmer_k - 1 < min_final_contig_len);
                    break;
                } else if (dbg.UniqueIncoming(next_node) == -1) {
                    is_tip = true;
                } else {
                    path.push_back(next_node);
                    cur_node = next_node;
                }
            }

            if (is_tip) {
                for (unsigned i = 0; i < path.size(); ++i) {
                    MarkNode(dbg, path[i]);
                }
                ++number_tips;
            }
        }
    }

#pragma omp parallel for
    for (int64_t node_idx = 0; node_idx < dbg.size; ++node_idx) {
        if (marked.get(node_idx)) {
            dbg.SetInvalid(node_idx);
        }
    }

    return number_tips;
}