TEST(BamHeaderTest, ExtractFromRawDataOk) { ReadGroupInfo rg1("rg1"); rg1.Sample("control"); ReadGroupInfo rg2("rg2"); rg2.Sample("condition1"); ReadGroupInfo rg3("rg3"); rg3.Sample("condition1"); SequenceInfo seq1("chr1"); seq1.Length("2038").Species("chocobo"); SequenceInfo seq2("chr2"); seq2.Length("3042").Species("chocobo"); ProgramInfo prog1("_foo_"); prog1.Name("ide"); BamHeader header; header.Version("1.1") .SortOrder("queryname") .PacBioBamVersion("3.0.1") .AddReadGroup(rg1) .AddReadGroup(rg2) .AddReadGroup(rg3) .AddSequence(seq1) .AddSequence(seq2) .AddProgram(prog1) .AddComment("ipsum and so on") .AddComment("citation needed"); const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n" "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n" "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n" "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@PG\tID:_foo_\tPN:ide\n" "@CO\tipsum and so on\n" "@CO\tcitation needed\n"; string text = header.ToSam(); PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), tests::BamHdrDeleter()); rawData->ignore_sam_err = 0; rawData->cigar_tab = NULL; rawData->l_text = text.size(); rawData->text = (char*)calloc(rawData->l_text + 1, 1); memcpy(rawData->text, text.c_str(), rawData->l_text); const BamHeader newHeader = BamHeader(string(rawData->text, rawData->l_text)); EXPECT_EQ(header.Version(), newHeader.Version()); EXPECT_EQ(header.SortOrder(), newHeader.SortOrder()); EXPECT_EQ(header.PacBioBamVersion(), newHeader.PacBioBamVersion()); text = newHeader.ToSam(); EXPECT_EQ(expectedText, text); }
PBBAM_SHARED_PTR<bam_hdr_t> BamHeaderMemory::MakeRawHeader(const BamHeader& header) { const string& text = header.ToSam(); PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), internal::HtslibHeaderDeleter()); rawData->ignore_sam_err = 0; rawData->cigar_tab = NULL; rawData->l_text = text.size(); rawData->text = (char*)calloc(rawData->l_text + 1, 1); memcpy(rawData->text, text.c_str(), rawData->l_text); return rawData; }
TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups) { const string hdrText = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;" "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;" "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t" "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\tPM:SEQUEL\n" "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n" "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n" }; // duplicate @RG:IDs handled ok (i.e. not duplicated in output) const BamHeader header1(hdrText); const BamHeader header2(hdrText); const BamHeader merged = header1 + header2; EXPECT_EQ(hdrText, merged.ToSam()); }
TEST(BamHeaderTest, EncodeTest) { ReadGroupInfo rg1("rg1"); rg1.Sample("control"); ReadGroupInfo rg2("rg2"); rg2.Sample("condition1"); ReadGroupInfo rg3("rg3"); rg3.Sample("condition1"); SequenceInfo seq1("chr1"); seq1.Length("2038").Species("chocobo"); SequenceInfo seq2("chr2"); seq2.Length("3042").Species("chocobo"); ProgramInfo prog1("_foo_"); prog1.Name("ide"); BamHeader header; header.Version("1.1") .SortOrder("queryname") .PacBioBamVersion("3.0.1") .AddReadGroup(rg1) .AddReadGroup(rg2) .AddReadGroup(rg3) .AddSequence(seq1) .AddSequence(seq2) .AddProgram(prog1) .AddComment("ipsum and so on") .AddComment("citation needed"); const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n" "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n" "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n" "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@PG\tID:_foo_\tPN:ide\n" "@CO\tipsum and so on\n" "@CO\tcitation needed\n"; const string& text = header.ToSam(); EXPECT_EQ(expectedText, text); }
TEST(BamHeaderTest, MergeOk) { const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;" "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;" "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t" "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t" "PM:SEQUEL\n" "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n" "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n" "@CO\tcomment1\n" }; const string hdrText2 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;" "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;" "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;" "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;" "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;" "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t" "PM:SEQUEL\n" "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n" "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n" "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n" "@CO\tcomment2\n" }; const string mergedText = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;" "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;" "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t" "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t" "PM:SEQUEL\n" "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;" "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;" "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;" "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;" "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;" "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t" "PM:SEQUEL\n" "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n" "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n" "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n" "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n" "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n" "@CO\tcomment1\n" "@CO\tcomment2\n" }; { // operator+ const BamHeader header1(hdrText1); const BamHeader header2(hdrText2); const BamHeader merged = header1 + header2; EXPECT_EQ(mergedText, merged.ToSam()); // also make sure inputs not changed EXPECT_EQ(hdrText1, header1.ToSam()); EXPECT_EQ(hdrText2, header2.ToSam()); } { // operator+= BamHeader header1(hdrText1); header1 += BamHeader(hdrText2); EXPECT_EQ(mergedText, header1.ToSam()); } }