TEST(CoverageTest, Coverage_add_alignment) { Alignment a; a.RefName = "foo"; a.position(3); CigarOp op; a.CigarData.clear(); op.Type = 'M'; op.Length = 2; a.CigarData.push_back(op); op.Type = 'N'; op.Length = 3; a.CigarData.push_back(op); op.Type = 'M'; op.Length = 2; a.CigarData.push_back(op); Coverage c; c.add(a); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 1, 1, 0, 0, 0, 1, 1)); }
TEST(CoverageTest, Coverage_add) { Coverage c; c.add("foo", 2, 3); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 1, 1, 1)); c.add("foo", 2, 2); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1)); c.add("foo", 6, 2); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1, 0, 1, 1)); c.setMinReferenceLength("foo", 10); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1, 0, 1, 1, 0, 0, 0)); }
TEST(CoverageTest, toString) { // Note that references are output in sorted order on reference name Coverage c; c.setMinReferenceLength("foo", 5); c.setMinReferenceLength("bar", 6); c.add("foo", 3, 2); c.add("bar", 1, 3); std::string expected = "bar\t6\n1\n1\n1\n0\n0\n0\nfoo\t5\n0\n0\n1\n1\n0\n"; std::stringstream out; c.toOutputStream(out); EXPECT_EQ(expected, out.str()); std::string out_string; c.toString(out_string); EXPECT_EQ(expected, out_string); }
int main (int argc, char* argv[]) { string gff_file_path, bam_file_path, output_file_path; vector<string> stack_file_paths; // TODO allow multiple bam files? try { TCLAP::CmdLine cmd("Program description", ' ', VERSION); TCLAP::MultiArg<string> inputSTACKS("s", "stack-file", "Stack file", false, "foo.stacks", cmd); TCLAP::ValueArg<string> inputGFF("g", "gff-file", "Input GFF file", true, "", "input_file.gff", cmd); TCLAP::ValueArg<string> inputBAM("b", "bam-file", "Input BAM file", true, "", "input_file.bam", cmd); TCLAP::ValueArg<string> outputFileArg("o", "output", "Output file", true, "", "output.coverage", cmd); cmd.parse(argc, argv); gff_file_path = inputGFF.getValue(); bam_file_path = inputBAM.getValue(); stack_file_paths = inputSTACKS.getValue(); output_file_path = outputFileArg.getValue(); } catch (TCLAP::ArgException &e) { cerr << "Error: " << e.error() << " " << e.argId() << endl; } std::ostream* output_stream; std::ofstream output_file_stream; if (output_file_path == "-") { cerr << "Outputting to standard out." << endl; output_stream = &cout; } else { output_file_stream.open(output_file_path.c_str(), std::ios::out | std::ios::trunc); if (!output_file_stream.is_open()) { cerr << "Error opening output file. Exiting." << endl; return 0; } output_stream = &output_file_stream; } BamReader reader; if(!reader.Open(bam_file_path)) { cerr << "Error opening the bam file. Exiting." << endl; return 0; } cerr << "Loading the reference GFF." << endl; // open GFF reference file std::ifstream gff_stream(gff_file_path.c_str()); if (!gff_stream.is_open()) { cerr << "Error opening reference GFF file. Exiting." << endl; return 0; } cerr << "Loading splice junctions from reference GFF." << endl; UniquePositionIndex junctions; ChildrenIndex exon_index; GFFReader gff_reader(gff_stream); Feature f; while (gff_reader.read(f)) { if (f.isExonType()) { exon_index.add(f); } } vector<string> IDs; exon_index.parentIDs(IDs); for (vector<string>::iterator ID = IDs.begin(); ID != IDs.end(); ++ID) { vector<Feature> exons; vector<Feature> juncs; exon_index.childrenOf(*ID, exons); getJunctions(exons, juncs); // TODO it's wasteful to have a juncs vector that just gets moved to the index // it'd be nicer if the index implemented the same interface as the vector // this means giving indexes iterators? // c++ you're so complicated... for (vector<Feature>::iterator junc = juncs.begin(); junc != juncs.end(); ++junc) { junctions.add(*junc); } } cerr << "Loading splice junctions from stack files." << endl; // load splice junctions from stack files for (vector<string>::iterator it = stack_file_paths.begin(); it != stack_file_paths.end(); ++it) { std::ifstream stack_stream(it->c_str()); if (!stack_stream.is_open()) { cerr << "Error opening stack file: " << *it << endl; cerr << "Skipping file." << endl; } else { StackReader stack_reader(stack_stream); Feature j; while (stack_reader.read(j)) { junctions.add(j); } } } cerr << "Found " << junctions.count(); cerr << " unique splice junctions." << endl; Coverage coverage; cerr << "Reading alignments and building coverage." << endl; // initialize references BamTools::RefVector ref_vec = reader.GetReferenceData(); for (int i = 0; i < ref_vec.size(); ++i) { BamTools::RefData data = ref_vec.at(i); coverage.setMinReferenceLength(data.RefName, data.RefLength); } // read and filter alignments, adding to coverages Alignment al, mate; Feature junction; while (reader.GetNextAlignment(al)) { if (al.IsPaired()) { bool valid = true; if (al.getJunction(junction)) valid = junctions.contains(junction); reader.GetNextAlignment(mate); if (mate.getJunction(junction)) valid = valid && junctions.contains(junction); if (valid) { coverage.add(al); coverage.add(mate); } } else { if (al.getJunction(junction)) coverage.add(al); else coverage.add(al); } } reader.Close(); cerr << "Writing coverage file." << endl; coverage.toOutputStream(*output_stream); return 0; }