Exemplo n.º 1
0
static void print_index_sequences(ostream& fout, Ebwt& ebwt)
{
    EList<string>* refnames = &(ebwt.refnames());

    TStr cat_ref;
    ebwt.restore(cat_ref);

    uint32_t curr_ref = 0xffffffff;
    string curr_ref_seq = "";
    uint32_t curr_ref_len = 0xffffffff;
    uint32_t last_text_off = 0;
    size_t orig_len = cat_ref.length();
    uint32_t tlen = 0xffffffff;
    bool first = true;
    for(size_t i = 0; i < orig_len; i++) {
        uint32_t tidx = 0xffffffff;
        uint32_t textoff = 0xffffffff;
        tlen = 0xffffffff;
        bool straddled = false;
        ebwt.joinedToTextOff(1 /* qlen */, (uint32_t)i, tidx, textoff, tlen, true, straddled);

        if (tidx != 0xffffffff && textoff < tlen)
        {
            if (curr_ref != tidx)
            {
                if (curr_ref != 0xffffffff)
                {
                    // Add trailing gaps, if any exist
                    if(curr_ref_seq.length() < curr_ref_len) {
                        curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
                    }
                    print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
                }
                curr_ref = tidx;
                curr_ref_seq = "";
                curr_ref_len = tlen;
                last_text_off = 0;
                first = true;
            }

            uint32_t textoff_adj = textoff;
            if(first && textoff > 0) textoff_adj++;
            if (textoff_adj - last_text_off > 1)
                curr_ref_seq += string(textoff_adj - last_text_off - 1, 'N');

            curr_ref_seq.push_back(cat_ref[i]);
            last_text_off = textoff;
            first = false;
        }
    }
    if (curr_ref < refnames->size())
    {
        // Add trailing gaps, if any exist
        if(curr_ref_seq.length() < curr_ref_len) {
            curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
        }
        print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
    }

}
Exemplo n.º 2
0
static void print_index_sequences(ostream& fout, Ebwt<index_t>& ebwt)
{
	EList<string>* refnames = &(ebwt.refnames());

	TStr cat_ref;
	ebwt.restore(cat_ref);

	HyperLogLogPlusMinus<uint64_t> kmer_counter;
	TIndexOffU curr_ref = OFF_MASK;
	string curr_ref_seq = "";
	TIndexOffU curr_ref_len = OFF_MASK;
	TIndexOffU last_text_off = 0;
	size_t orig_len = cat_ref.length();
	TIndexOffU tlen = OFF_MASK;
	bool first = true;
	for(size_t i = 0; i < orig_len; i++) {
		TIndexOffU tidx = OFF_MASK;
		TIndexOffU textoff = OFF_MASK;
		tlen = OFF_MASK;
		bool straddled = false;
		ebwt.joinedToTextOff(1 /* qlen */, (TIndexOffU)i, tidx, textoff, tlen, true, straddled);

		if (tidx != OFF_MASK && textoff < tlen)
		{
			if (curr_ref != tidx)
			{
				if (curr_ref != OFF_MASK)
				{
					// Add trailing gaps, if any exist
					if(curr_ref_seq.length() < curr_ref_len) {
						curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
					}
					print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
				}
				curr_ref = tidx;
				curr_ref_seq = "";
				curr_ref_len = tlen;
				last_text_off = 0;
				first = true;
			}

			TIndexOffU textoff_adj = textoff;
			if(first && textoff > 0) textoff_adj++;
			if (textoff_adj - last_text_off > 1)
				curr_ref_seq += string(textoff_adj - last_text_off - 1, 'N');

            curr_ref_seq.push_back("ACGT"[int(cat_ref[i])]);			
			last_text_off = textoff;
			first = false;
		}
	}
	if (curr_ref < refnames->size())
	{
		// Add trailing gaps, if any exist
		if(curr_ref_seq.length() < curr_ref_len) {
			curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
		}
		print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
	}

}
Exemplo n.º 3
0
void print_index_sequences(
    ostream& fout,
    Ebwt<TStr>& ebwt,
    const BitPairReference& refs)
{
    vector<string>* refnames = &(ebwt.refnames());

    TStr cat_ref;
    ebwt.restore(cat_ref);

    TIndexOffU curr_ref = OFF_MASK;
    string curr_ref_seq = "";
    TIndexOffU curr_ref_len = OFF_MASK;
    uint32_t last_text_off = 0;
    size_t orig_len = seqan::length(cat_ref);
    TIndexOffU tlen = OFF_MASK;
    bool first = true;
    for(size_t i = 0; i < orig_len; i++) {
        TIndexOffU tidx = OFF_MASK;
        TIndexOffU textoff = OFF_MASK;
        tlen = OFF_MASK;

        ebwt.joinedToTextOff(1 /* qlen */, (TIndexOffU)i, tidx, textoff, tlen);

        if (tidx != OFF_MASK && textoff < tlen)
        {
            if (curr_ref != tidx)
            {
                if (curr_ref != OFF_MASK)
                {
                    // Add trailing gaps, if any exist
                    if(curr_ref_seq.length() < curr_ref_len) {
                        curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
                    }
                    print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
                }
                curr_ref = tidx;
                curr_ref_seq = "";
                curr_ref_len = tlen;
                last_text_off = 0;
                first = true;
            }

            TIndexOffU textoff_adj = textoff;
            if(first && textoff > 0) textoff_adj++;
            if (textoff_adj - last_text_off > 1)
                curr_ref_seq += string(textoff_adj - last_text_off - 1, 'N');

            curr_ref_seq.push_back(getValue(cat_ref,i));
            last_text_off = textoff;
            first = false;
        }
    }
    if (curr_ref < refnames->size())
    {
        // Add trailing gaps, if any exist
        if(curr_ref_seq.length() < curr_ref_len) {
            curr_ref_seq += string(curr_ref_len - curr_ref_seq.length(), 'N');
        }
        print_fasta_record(fout, (*refnames)[curr_ref], curr_ref_seq);
    }

}