TEST(BamHeaderTest, DefaultConstruction) { BamHeader header; EXPECT_TRUE(header.Version().empty()); EXPECT_TRUE(header.SortOrder().empty()); // default to unknown ? EXPECT_TRUE(header.ReadGroups().empty()); EXPECT_TRUE(header.Sequences().empty()); EXPECT_TRUE(header.Programs().empty()); EXPECT_TRUE(header.Comments().empty()); EXPECT_THROW(header.Program("foo"), std::exception); EXPECT_THROW(header.ReadGroup("foo"), std::exception); EXPECT_THROW(header.SequenceId("foo"), std::exception); EXPECT_THROW(header.SequenceLength(42), std::exception); EXPECT_THROW(header.SequenceName(42), std::exception); }
TEST(BamHeaderTest, DecodeTest) { const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n" "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n" "@RG\tID:rg1\tSM:control\n" "@RG\tID:rg2\tSM:condition1\n" "@RG\tID:rg3\tSM:condition1\n" "@PG\tID:_foo_\tPN:ide\n" "@CO\tipsum and so on\n" "@CO\tcitation needed\n"; BamHeader header = BamHeader(text); EXPECT_EQ(string("1.1"), header.Version()); EXPECT_EQ(string("queryname"), header.SortOrder()); EXPECT_EQ(string("3.0.1"), header.PacBioBamVersion()); EXPECT_EQ(3, header.ReadGroups().size()); EXPECT_TRUE(header.HasReadGroup("rg1")); EXPECT_TRUE(header.HasReadGroup("rg2")); EXPECT_TRUE(header.HasReadGroup("rg3")); EXPECT_EQ(string("control"), header.ReadGroup("rg1").Sample()); EXPECT_EQ(string("condition1"), header.ReadGroup("rg2").Sample()); EXPECT_EQ(string("condition1"), header.ReadGroup("rg3").Sample()); EXPECT_EQ(2, header.Sequences().size()); EXPECT_TRUE(header.HasSequence("chr1")); EXPECT_TRUE(header.HasSequence("chr2")); EXPECT_EQ(string("chocobo"), header.Sequence("chr1").Species()); EXPECT_EQ(string("chocobo"), header.Sequence("chr2").Species()); EXPECT_EQ(string("2038"), header.Sequence("chr1").Length()); EXPECT_EQ(string("3042"), header.Sequence("chr2").Length()); EXPECT_EQ(1, header.Programs().size()); EXPECT_TRUE(header.HasProgram("_foo_")); EXPECT_EQ(string("ide"), header.Program("_foo_").Name()); EXPECT_EQ(2, header.Comments().size()); EXPECT_EQ(string("ipsum and so on"), header.Comments().at(0)); EXPECT_EQ(string("citation needed"), header.Comments().at(1)); }
TEST(PacBioIndexTest, CreateOnTheFly) { // do this in temp directory, so we can ensure write access const string tempDir = tests::GeneratedData_Dir + "/"; const string tempBamFn = tempDir + "temp.bam"; const string tempPbiFn = tempBamFn + ".pbi"; // NOTE: new file differs in size than existing (different write parameters may yield different file sizes, even though content is same) const vector<int64_t> expectedNewOffsets = { 33816576, 236126208, 391315456, 469106688, 537067520, 587792384, 867303424, 1182793728, 1449787392, 1582628864 }; vector<int64_t> observedOffsets; // create PBI on the fly from input BAM while we write to new file { BamFile bamFile(test2BamFn); BamHeader header = bamFile.Header(); BamWriter writer(tempBamFn, header); // default compression, default thread count PbiBuilder builder(tempPbiFn, header.Sequences().size()); int64_t vOffset = 0; EntireFileQuery entireFile(bamFile); for (const BamRecord& record : entireFile) { writer.Write(record, &vOffset); builder.AddRecord(record, vOffset); observedOffsets.push_back(vOffset); } } EXPECT_EQ(expectedNewOffsets, observedOffsets); // sanity check on original file { const vector<int64_t> originalFileOffsets = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 }; BamRecord r; BamReader reader(test2BamFn); for (int i = 0; i < originalFileOffsets.size(); ++i) { reader.VirtualSeek(originalFileOffsets.at(i)); EXPECT_TRUE(CanRead(reader, r, i)); } } // attempt to seek in our new file using both expected & observed offsets { BamRecord r; BamReader reader(tempBamFn); for (int i = 0; i < expectedNewOffsets.size(); ++i) { reader.VirtualSeek(expectedNewOffsets.at(i)); EXPECT_TRUE(CanRead(reader, r, i)); } for (int i = 0; i < observedOffsets.size(); ++i) { reader.VirtualSeek(observedOffsets.at(i)); EXPECT_TRUE(CanRead(reader, r, i)); } } // compare data in new PBI file, to expected data const PbiRawData& expectedIndex = tests::Test2Bam_NewIndex(); const PbiRawData& fromBuilt = PbiRawData(tempPbiFn); tests::ExpectRawIndicesEqual(expectedIndex, fromBuilt); // straight diff of newly-generated PBI file to existing PBI // TODO: Come back to this once pbindexump is in place. // We can't exactly do this since file offsets may differ between 2 BAMs of differing compression levels. // Should add some sort of BAM checksum based on contents, not just size, for this reason. // const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn; // EXPECT_EQ(0, system(pbiDiffCmd.c_str())); // clean up temp file(s) remove(tempBamFn.c_str()); remove(tempPbiFn.c_str()); }