예제 #1
0
/** Function get_seeds()
 *
 * Produce seeds: rID-> (s0, s1, s2...) for each fragment given two
 * fq files f and f2
 */
void get_seeds (ii64vec_t& list_seeds, const std::string& f,
		const std::string& f2, int seed_len, int batch, bool silent) {

	std::ifstream fh, fh2;
	xny::openfile<std::ifstream>(fh, f);
	xny::openfile<std::ifstream>(fh2, f2);
	bio::fastq_input_iterator<> fq(fh), end, fq2(fh2);

	int total_read_pairs = 0;
	strvec_t pairs;

	while (fq != end && fq2 != end) {

 		add_fq_reads_only (pairs, batch/2, fq, end);
		add_fq_reads_only (pairs, batch/2, fq2, end);

		generate_seeds (list_seeds, pairs, seed_len);

		total_read_pairs += pairs.size()/2;

		pairs.clear();

	} // while

	if (!silent) {
		std::cout << "\t\ttotal frags: " << total_read_pairs <<  "\n";
	}

	xny::closefile(fh);
	xny::closefile(fh2);
} // get_seeds
예제 #2
0
/**	Function get_super_sketches
 *
 */
void get_super_sketches (std::vector<sketch_t>& super_sketches,
	const std::string& f, const std::string& f2, xny::sketch_list& slistgen,
	xny::super_sketch& ssgen, jaz::murmur264& hashfunc, int batch, bool silent) {

	std::ifstream fh, fh2;
	xny::openfile<std::ifstream>(fh, f);
	xny::openfile<std::ifstream>(fh2, f2);
	bio::fastq_input_iterator<> fq(fh), end, fq2(fh2);

	int total_read_pairs = 0;
	strvec_t pairs;

	while (fq != end && fq2 != end) {

 		add_fq_reads_only (pairs, batch/2, fq, end);
		add_fq_reads_only (pairs, batch/2, fq2, end);

		generate_super_sketches (super_sketches, pairs, slistgen,
				ssgen, hashfunc);

		total_read_pairs += pairs.size()/2;

		pairs.clear();

	} // while

	if (!silent) {
		std::cout << "\t\t\ttotal pairs, super_sketches: " << total_read_pairs
				<< ", " 	<< super_sketches.size() <<  "\n";
	}

	xny::closefile(fh);
	xny::closefile(fh2);
} // get_super_sketches
예제 #3
0
/**	Function clean_dupl_frag ()
 *
 */
void clean_dupl_frag (const std::string& ifq, const std::string& ifq2,
	std::ofstream& ofhfq, std::ofstream& ofhfq2, const iset_t& duplIDs,
	xny::low_complexity& lc, int batch){

	std::ifstream ifhfq, ifhfq2;
	xny::openfile<std::ifstream>(ifhfq, ifq);
	xny::openfile<std::ifstream>(ifhfq2, ifq2);
	bio::fastq_input_iterator<> fq(ifhfq), end, fq2(ifhfq2);

	int total_read_pairs = 0;
	std::vector<fqtuple_t> pairs;

	int num_lc = 0;
	int fragID = 0;
	while (fq != end && fq2 != end) {

 		add_fq_reads (pairs, batch/2, fq, end);
		add_fq_reads (pairs, batch/2, fq2, end);
		iset_t low_complex_frag;
		check_low_complexity (low_complex_frag, fragID, pairs, lc);
		num_lc += low_complex_frag.size();
		int fragnum = pairs.size()/2;
		for (int i = 0; i < fragnum; ++ i) {
			if ( (!duplIDs.count(fragID)) &&
				 (!low_complex_frag.count(fragID))) { // output
				ofhfq << "@" << std::get<0>(pairs[i]) << "\n";
				ofhfq << std::get<1>(pairs[i]) << "\n";
				ofhfq << "+\n";
				ofhfq << std::get<2>(pairs[i]) << "\n";

				ofhfq2 << "@" << std::get<0>(pairs[i + fragnum]) << "\n";
				ofhfq2 << std::get<1>(pairs[i + fragnum]) << "\n";
				ofhfq2 << "+\n";
				ofhfq2 << std::get<2>(pairs[i + fragnum]) << "\n";
			}
			++ fragID;
		}

		total_read_pairs += pairs.size()/2;

		pairs.clear();

	} // while

	std::cout << "\t\tlow complexity fragments: " << num_lc << "\n\n";
	xny::closefile(ifhfq);
	xny::closefile(ifhfq2);

} // clean_dupl_frag
예제 #4
0
void BamToFastq::PairedFastq() {
    // open the 1st fastq file for writing
    ofstream fq1(_fastq1.c_str(), ios::out);
    if ( !fq1 ) {
        cerr << "Error: The first fastq file (" << _fastq1 << ") could not be opened.  Exiting!" << endl;
        exit (1);
    }
    // open the 2nd fastq file for writing
    ofstream fq2(_fastq2.c_str(), ios::out);
    if ( !fq2 ) {
        cerr << "Error: The second fastq file (" << _fastq2 << ") could not be opened.  Exiting!" << endl;
        exit (1);
    }
    // open the BAM file
    BamReader reader;
    reader.Open(_bamFile);
    // rip through the BAM file and convert each mapped entry to BEDPE
    BamAlignment bam1, bam2;
    bool shouldConsumeReads = true;
    while (true) {
        
        if (shouldConsumeReads) {
            if (!reader.GetNextAlignment(bam1) || !reader.GetNextAlignment(bam2)) break;
        } else {
            shouldConsumeReads = true;
        }
        if (bam1.Name != bam2.Name) {
            while (bam1.Name != bam2.Name)
            {
                if (bam1.IsPaired()) 
                {
                    cerr << "*****WARNING: Query " << bam1.Name
                         << " is marked as paired, but its mate does not occur"
                         << " next to it in your BAM file.  Skipping. " << endl;
                }
                bam1 = bam2;
                if (!reader.GetNextAlignment(bam2)) break;
                shouldConsumeReads = false;
            }
        }
        else if (bam1.IsPaired() && bam2.IsPaired()) {
            // extract the sequence and qualities for the BAM "query"
            string seq1  = bam1.QueryBases;
            string qual1 = bam1.Qualities;
            string seq2  = bam2.QueryBases;
            string qual2 = bam2.Qualities;
            if (bam1.IsReverseStrand() == true) {
                reverseComplement(seq1);
                reverseSequence(qual1);
            }
            if (bam2.IsReverseStrand() == true) {
                reverseComplement(seq2);
                reverseSequence(qual2);
            }
            fq1 << "@" << bam1.Name << "/1" << endl;
            fq1 << seq1 << endl;
            fq1 << "+" << endl;
            fq1 << qual1 << endl;
            
            fq2 << "@" << bam2.Name << "/2" << endl;
            fq2 << seq2 << endl;
            fq2 << "+" << endl;
            fq2 << qual2 << endl;
        }
    }
    reader.Close();
}
예제 #5
0
void BamToFastq::PairedFastqUseTags() {

    // open the 1st fastq file for writing
    ofstream fq1(_fastq1.c_str(), ios::out);
    if ( !fq1 ) {
        cerr << "Error: The first fastq file (" << _fastq1 << ") could not be opened.  Exiting!" << endl;
        exit (1);
    }
    // open the 2nd fastq file for writing
    ofstream fq2(_fastq2.c_str(), ios::out);
    if ( !fq2 ) {
        cerr << "Error: The second fastq file (" << _fastq2 << ") could not be opened.  Exiting!" << endl;
        exit (1);
    }

    // open the BAM file
    BamReader reader;
    reader.Open(_bamFile);
    // rip through the BAM file and convert each mapped entry to BEDPE
    BamAlignment bam1, bam2;
    while (reader.GetNextAlignment(bam1)) {
        
        reader.GetNextAlignment(bam2);        
        if (bam1.Name != bam2.Name) {
            while (bam1.Name != bam2.Name)
            {
                if (bam1.IsPaired()) 
                {
                    cerr << "*****WARNING: Query " << bam1.Name
                         << " is marked as paired, but it's mate does not occur"
                         << " next to it in your BAM file.  Skipping. " << endl;
                }
                bam1 = bam2;
                reader.GetNextAlignment(bam2);
            }
        }
        else if (bam1.IsPaired() && bam2.IsPaired()) {
            // assume the R2 and Q2 tags are on the + strand.
            string mateSequence, mateQualities;
            bam1.GetTag("R2", mateSequence);
            bam1.GetTag("Q2", mateQualities);

            string seq1  = bam1.QueryBases;
            string qual1 = bam1.Qualities;
            if (bam1.IsReverseStrand() == true) {
                reverseComplement(seq1);
                reverseSequence(qual1);
            }
            
            // since the info for both ends are contained in each BAM record,
            // we only need to process one of the two records (bam1) in order
            // to produce FASTQ entries for both ends.
            // NOTE: Assumes that R2 and Q2 have already been rev 
            //      and revcomped if necessary
            if (bam1.IsFirstMate() == true) {
                // end1
                fq1 << "@" << bam1.Name << "/1" << endl;
                fq1 << seq1 << endl;
                fq1 << "+" << endl;
                fq1 << qual1 << endl;
                // end2
                fq2 << "@" << bam1.Name << "/2" <<endl;
                fq2 << mateSequence << endl;
                fq2 << "+" << endl;
                fq2 << mateQualities << endl;
            }
            else {
                // end 2
                fq2 << "@" << bam1.Name << "/2" <<endl;
                fq2 << seq1 << endl;
                fq2 << "+" << endl;
                fq2 << qual1 << endl;
                // end 1
                fq1 << "@" << bam1.Name << "/1" <<endl;
                fq1 << mateSequence << endl;
                fq1 << "+" << endl;
                fq1 << mateQualities << endl;
            }
        }
    }
    reader.Close();
}