/** * Returns a lexicographically sorted list of merged barcodes, each paired with * the 0-based index of corresponding barcode in the source vector. */ barcode_vec sort_barcodes(const fastq_pair_vec& barcodes) { AR_DEBUG_ASSERT(!barcodes.empty()); barcode_vec sorted_barcodes; const size_t max_key_1_len = barcodes.front().first.length(); const size_t max_key_2_len = barcodes.front().second.length(); for (auto it = barcodes.begin(); it != barcodes.end(); ++it) { if (it->first.length() != max_key_1_len) { throw barcode_error("mate 1 barcodes do not have the same length"); } else if (it->second.length() != max_key_2_len) { throw barcode_error("mate 2 barcodes do not have the same length"); } std::string barcode; barcode.reserve(max_key_1_len + max_key_2_len); barcode.append(it->first.sequence()); barcode.append(it->second.sequence()); sorted_barcodes.push_back(barcode_pair(barcode, it - barcodes.begin())); } std::sort(sorted_barcodes.begin(), sorted_barcodes.end()); return sorted_barcodes; }
void adjust_sigma(Simulator *sim, int steps, double step_size, double factor) { double original_sigma = sim->sigma; double best_sigma = sim->sigma; double best_error = barcode_error(sim); for (int step = -steps; step <= steps; step++) { if (step == 0) continue; sim->sigma = original_sigma + step * step_size; if (sim->sigma < 1.0) continue; compute_kernel(sim); convolve_1d(sim->kernel_width, sim->kernel, sim->width, sim->guess, sim->blur, 0, sim->width); double error = barcode_error(sim); // printf("sigma=%.2f error=%.2f\n", sim->sigma, error); if (error < best_error) { best_error = error; best_sigma = sim->sigma; } } sim->sigma = best_sigma * factor + original_sigma * (1.0 - factor); compute_kernel(sim); convolve_1d(sim->kernel_width, sim->kernel, sim->width, sim->guess, sim->blur, 0, sim->width); }
/** Adds a nucleotide sequence with a given ID to a quad-tree. */ void add_sequence_to_tree(demux_node_vec& tree, const std::string& sequence, const size_t barcode_id) { size_t node_idx = 0; bool added_last_node = false; for (auto nuc : sequence) { auto& node = tree.at(node_idx); // Indicate when PE barcodes can be unambigiously identified from SE // reads node.value = (node.value == barcode_table::no_match) ? barcode_id : barcode_table::ambigious; const auto nuc_idx = ACGT_TO_IDX(nuc); auto child = node.children[nuc_idx]; added_last_node = (child == barcode_table::no_match); if (added_last_node) { // New nodes are added to the end of the list; as barcodes are // added in lexicographic order, this helps ensure that a set of // similar barcodes will be placed in mostly contiguous runs // of the vector representation. child = node.children[nuc_idx] = tree.size(); tree.push_back(demultiplexer_node()); } node_idx = child; } if (!added_last_node) { throw barcode_error(std::string("duplicate barcode (pair): ") + sequence); } tree.at(node_idx).value = barcode_id; }