/**
 * Returns a lexicographically sorted list of merged barcodes, each paired with
 * the 0-based index of corresponding barcode in the source vector.
 */
barcode_vec sort_barcodes(const fastq_pair_vec& barcodes)
{
    AR_DEBUG_ASSERT(!barcodes.empty());
    barcode_vec sorted_barcodes;

    const size_t max_key_1_len = barcodes.front().first.length();
    const size_t max_key_2_len = barcodes.front().second.length();
    for (auto it = barcodes.begin(); it != barcodes.end(); ++it) {
        if (it->first.length() != max_key_1_len) {
            throw barcode_error("mate 1 barcodes do not have the same length");
        } else if (it->second.length() != max_key_2_len) {
            throw barcode_error("mate 2 barcodes do not have the same length");
        }

        std::string barcode;
        barcode.reserve(max_key_1_len + max_key_2_len);
        barcode.append(it->first.sequence());
        barcode.append(it->second.sequence());

        sorted_barcodes.push_back(barcode_pair(barcode, it - barcodes.begin()));
    }

    std::sort(sorted_barcodes.begin(), sorted_barcodes.end());

    return sorted_barcodes;
}
示例#2
0
void adjust_sigma(Simulator *sim, int steps, double step_size,
				  double factor)
{
	double original_sigma = sim->sigma;
	double best_sigma = sim->sigma;
	double best_error = barcode_error(sim);
	for (int step = -steps; step <= steps; step++) {
		if (step == 0)
			continue;
		sim->sigma = original_sigma + step * step_size;
		if (sim->sigma < 1.0)
			continue;
		compute_kernel(sim);
		convolve_1d(sim->kernel_width, sim->kernel,
					sim->width, sim->guess, sim->blur, 0, sim->width);
		double error = barcode_error(sim);
		// printf("sigma=%.2f error=%.2f\n", sim->sigma, error);
		if (error < best_error) {
			best_error = error;
			best_sigma = sim->sigma;
		}
	}
	sim->sigma = best_sigma * factor + original_sigma * (1.0 - factor);
	compute_kernel(sim);
	convolve_1d(sim->kernel_width, sim->kernel,
				sim->width, sim->guess, sim->blur, 0, sim->width);
}
/** Adds a nucleotide sequence with a given ID to a quad-tree. */
void add_sequence_to_tree(demux_node_vec& tree, const std::string& sequence,
                          const size_t barcode_id)
{
    size_t node_idx = 0;
    bool added_last_node = false;
    for (auto nuc : sequence) {
        auto& node = tree.at(node_idx);
        // Indicate when PE barcodes can be unambigiously identified from SE
        // reads
        node.value = (node.value == barcode_table::no_match)
                         ? barcode_id
                         : barcode_table::ambigious;

        const auto nuc_idx = ACGT_TO_IDX(nuc);
        auto child = node.children[nuc_idx];

        added_last_node = (child == barcode_table::no_match);
        if (added_last_node) {
            // New nodes are added to the end of the list; as barcodes are
            // added in lexicographic order, this helps ensure that a set of
            // similar barcodes will be placed in mostly contiguous runs
            // of the vector representation.
            child = node.children[nuc_idx] = tree.size();
            tree.push_back(demultiplexer_node());
        }

        node_idx = child;
    }

    if (!added_last_node) {
        throw barcode_error(std::string("duplicate barcode (pair): ") +
                            sequence);
    }

    tree.at(node_idx).value = barcode_id;
}