void Caller::call_base_pileup(const NodePileup& np, int64_t offset) { const BasePileup& bp = np.base_pileup(offset); // parse the pilueup structure vector<pair<int, int> > base_offsets; vector<pair<int, int> > indel_offsets; Pileups::parse_base_offsets(bp, base_offsets, indel_offsets, indel_offsets); // compute top two most frequent bases and their counts char top_base; int top_count; char second_base; int second_count; compute_top_frequencies(bp, base_offsets, top_base, top_count, second_base, second_count); // note first and second base will be upper case too char ref_base = ::toupper(bp.ref_base()); // test against thresholding heuristics if ((double)(top_count + second_count) / (double)base_offsets.size() >= _min_frac) { // compute max likelihood snp genotype. it will be one of the three combinations // of the top two bases (we don't care about case here) pair<char, char> g = mp_snp_genotype(bp, base_offsets, top_base, second_base); // update the node calls if (top_count >= _min_support) { if (g.first != ref_base) { _node_calls[offset].first = g.first; } else { _node_calls[offset].first = '.'; } } if (second_count >= _min_support) { if (g.second != ref_base && g.second != g.first) { _node_calls[offset].second = g.second; } else { _node_calls[offset].second = '.'; } } } }
void Caller::call_node_pileup(const NodePileup& pileup) { _node = _graph->get_node(pileup.node_id()); assert(_node != NULL); _node_calls.clear(); char def_char = _leave_uncalled ? '.' : '-'; _node_calls.assign(_node->sequence().length(), Genotype(def_char, def_char)); // todo: parallelize this loop // process each base in pileup individually for (int i = 0; i < pileup.base_pileup_size(); ++i) { if (pileup.base_pileup(i).num_bases() >= _min_depth && pileup.base_pileup(i).num_bases() <= _max_depth) { call_base_pileup(pileup, i); } } // add nodes and edges created when making calls to the output graph // (_side_map gets updated) create_node_calls(pileup); _visited_nodes.insert(_node->id()); }