TEST(BamHeaderTest, ExtractFromRawDataOk)
{
    ReadGroupInfo rg1("rg1");
    rg1.Sample("control");
    ReadGroupInfo rg2("rg2");
    rg2.Sample("condition1");
    ReadGroupInfo rg3("rg3");
    rg3.Sample("condition1");

    SequenceInfo seq1("chr1");
    seq1.Length("2038").Species("chocobo");
    SequenceInfo seq2("chr2");
    seq2.Length("3042").Species("chocobo");

    ProgramInfo prog1("_foo_");
    prog1.Name("ide");

    BamHeader header;
    header.Version("1.1")
          .SortOrder("queryname")
          .PacBioBamVersion("3.0.1")
          .AddReadGroup(rg1)
          .AddReadGroup(rg2)
          .AddReadGroup(rg3)
          .AddSequence(seq1)
          .AddSequence(seq2)
          .AddProgram(prog1)
          .AddComment("ipsum and so on")
          .AddComment("citation needed");

    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                 "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                 "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@PG\tID:_foo_\tPN:ide\n"
                                 "@CO\tipsum and so on\n"
                                 "@CO\tcitation needed\n";


    string text = header.ToSam();
    PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), tests::BamHdrDeleter());
    rawData->ignore_sam_err = 0;
    rawData->cigar_tab = NULL;
    rawData->l_text = text.size();
    rawData->text = (char*)calloc(rawData->l_text + 1, 1);
    memcpy(rawData->text, text.c_str(), rawData->l_text);

    const BamHeader newHeader = BamHeader(string(rawData->text, rawData->l_text));

    EXPECT_EQ(header.Version(),          newHeader.Version());
    EXPECT_EQ(header.SortOrder(),        newHeader.SortOrder());
    EXPECT_EQ(header.PacBioBamVersion(), newHeader.PacBioBamVersion());

    text = newHeader.ToSam();
    EXPECT_EQ(expectedText, text);
}
Beispiel #2
0
PBBAM_SHARED_PTR<bam_hdr_t> BamHeaderMemory::MakeRawHeader(const BamHeader& header)
{
    const string& text = header.ToSam();
    PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), internal::HtslibHeaderDeleter());
    rawData->ignore_sam_err = 0;
    rawData->cigar_tab = NULL;
    rawData->l_text = text.size();
    rawData->text = (char*)calloc(rawData->l_text + 1, 1);
    memcpy(rawData->text, text.c_str(), rawData->l_text);
    return rawData;
}
TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups)
{
    const string hdrText = {
        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\tPM:SEQUEL\n"
        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
    };

    // duplicate @RG:IDs handled ok (i.e. not duplicated in output)
    const BamHeader header1(hdrText);
    const BamHeader header2(hdrText);
    const BamHeader merged = header1 + header2;
    EXPECT_EQ(hdrText, merged.ToSam());
}
TEST(BamHeaderTest, EncodeTest)
{
    ReadGroupInfo rg1("rg1");
    rg1.Sample("control");
    ReadGroupInfo rg2("rg2");
    rg2.Sample("condition1");
    ReadGroupInfo rg3("rg3");
    rg3.Sample("condition1");

    SequenceInfo seq1("chr1");
    seq1.Length("2038").Species("chocobo");
    SequenceInfo seq2("chr2");
    seq2.Length("3042").Species("chocobo");

    ProgramInfo prog1("_foo_");
    prog1.Name("ide");

    BamHeader header;
    header.Version("1.1")
          .SortOrder("queryname")
          .PacBioBamVersion("3.0.1")
          .AddReadGroup(rg1)
          .AddReadGroup(rg2)
          .AddReadGroup(rg3)
          .AddSequence(seq1)
          .AddSequence(seq2)
          .AddProgram(prog1)
          .AddComment("ipsum and so on")
          .AddComment("citation needed");

    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                 "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                 "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@PG\tID:_foo_\tPN:ide\n"
                                 "@CO\tipsum and so on\n"
                                 "@CO\tcitation needed\n";

    const string& text = header.ToSam();
    EXPECT_EQ(expectedText, text);
}
TEST(BamHeaderTest, MergeOk)
{
    const string hdrText1 = {
        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
            "PM:SEQUEL\n"
        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
        "@CO\tcomment1\n"
    };

    const string hdrText2 = {
        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
        "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
            "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
            "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
            "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
            "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
            "PM:SEQUEL\n"
        "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
        "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
        "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
        "@CO\tcomment2\n"
    };

    const string mergedText = {
        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
            "PM:SEQUEL\n"
        "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
            "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
            "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
            "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
            "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
            "PM:SEQUEL\n"
        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
        "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
        "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
        "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
        "@CO\tcomment1\n"
        "@CO\tcomment2\n"
    };

    { // operator+

        const BamHeader header1(hdrText1);
        const BamHeader header2(hdrText2);
        const BamHeader merged = header1 + header2;
        EXPECT_EQ(mergedText, merged.ToSam());

        // also make sure inputs not changed
        EXPECT_EQ(hdrText1, header1.ToSam());
        EXPECT_EQ(hdrText2, header2.ToSam());
    }

    { // operator+=

        BamHeader header1(hdrText1);
        header1 += BamHeader(hdrText2);
        EXPECT_EQ(mergedText, header1.ToSam());
    }
}