TEST(BamHeaderTest, ExtractFromRawDataOk)
{
    ReadGroupInfo rg1("rg1");
    rg1.Sample("control");
    ReadGroupInfo rg2("rg2");
    rg2.Sample("condition1");
    ReadGroupInfo rg3("rg3");
    rg3.Sample("condition1");

    SequenceInfo seq1("chr1");
    seq1.Length("2038").Species("chocobo");
    SequenceInfo seq2("chr2");
    seq2.Length("3042").Species("chocobo");

    ProgramInfo prog1("_foo_");
    prog1.Name("ide");

    BamHeader header;
    header.Version("1.1")
          .SortOrder("queryname")
          .PacBioBamVersion("3.0.1")
          .AddReadGroup(rg1)
          .AddReadGroup(rg2)
          .AddReadGroup(rg3)
          .AddSequence(seq1)
          .AddSequence(seq2)
          .AddProgram(prog1)
          .AddComment("ipsum and so on")
          .AddComment("citation needed");

    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                 "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                 "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                 "@PG\tID:_foo_\tPN:ide\n"
                                 "@CO\tipsum and so on\n"
                                 "@CO\tcitation needed\n";


    string text = header.ToSam();
    PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), tests::BamHdrDeleter());
    rawData->ignore_sam_err = 0;
    rawData->cigar_tab = NULL;
    rawData->l_text = text.size();
    rawData->text = (char*)calloc(rawData->l_text + 1, 1);
    memcpy(rawData->text, text.c_str(), rawData->l_text);

    const BamHeader newHeader = BamHeader(string(rawData->text, rawData->l_text));

    EXPECT_EQ(header.Version(),          newHeader.Version());
    EXPECT_EQ(header.SortOrder(),        newHeader.SortOrder());
    EXPECT_EQ(header.PacBioBamVersion(), newHeader.PacBioBamVersion());

    text = newHeader.ToSam();
    EXPECT_EQ(expectedText, text);
}
Example #2
0
BamHeader PrepareHeader(const OptionParser& parser, int argc, char** argv,
                        const vector<string>& files)
{
    using boost::algorithm::join;

    ProgramInfo program(parser.prog() + "-" + CCS_VERSION);
    program.Name(parser.prog())
    .CommandLine(parser.prog() + " " + join(vector<string>(argv + 1, argv + argc), " "))
    .Description(DESCRIPTION)
    .Version(CCS_VERSION);

    BamHeader header;
    header.PacBioBamVersion("3.0.1").SortOrder("unknown").Version("1.5").AddProgram(program);

    for (const auto& file : files) {
        BamFile bam(file);

        for (const auto& rg : bam.Header().ReadGroups()) {
            if (rg.ReadType() != "SUBREAD")
                parser.error("invalid input file, READTYPE must be SUBREAD");

            ReadGroupInfo readGroup(rg.MovieName(), "CCS");
            readGroup.BindingKit(rg.BindingKit())
            .SequencingKit(rg.SequencingKit())
            .BasecallerVersion(rg.BasecallerVersion())
            .FrameRateHz(rg.FrameRateHz());

            header.AddReadGroup(readGroup);
        }
    }

    return header;
}
TEST(BamHeaderTest, DecodeTest)
{
    const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                         "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                         "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                         "@RG\tID:rg1\tSM:control\n"
                         "@RG\tID:rg2\tSM:condition1\n"
                         "@RG\tID:rg3\tSM:condition1\n"
                         "@PG\tID:_foo_\tPN:ide\n"
                         "@CO\tipsum and so on\n"
                         "@CO\tcitation needed\n";

    BamHeader header = BamHeader(text);

    EXPECT_EQ(string("1.1"),       header.Version());
    EXPECT_EQ(string("queryname"), header.SortOrder());
    EXPECT_EQ(string("3.0.1"),     header.PacBioBamVersion());

    EXPECT_EQ(3, header.ReadGroups().size());
    EXPECT_TRUE(header.HasReadGroup("rg1"));
    EXPECT_TRUE(header.HasReadGroup("rg2"));
    EXPECT_TRUE(header.HasReadGroup("rg3"));

    EXPECT_EQ(string("control"),    header.ReadGroup("rg1").Sample());
    EXPECT_EQ(string("condition1"), header.ReadGroup("rg2").Sample());
    EXPECT_EQ(string("condition1"), header.ReadGroup("rg3").Sample());

    EXPECT_EQ(2, header.Sequences().size());
    EXPECT_TRUE(header.HasSequence("chr1"));
    EXPECT_TRUE(header.HasSequence("chr2"));
    EXPECT_EQ(string("chocobo"), header.Sequence("chr1").Species());
    EXPECT_EQ(string("chocobo"), header.Sequence("chr2").Species());
    EXPECT_EQ(string("2038"), header.Sequence("chr1").Length());
    EXPECT_EQ(string("3042"), header.Sequence("chr2").Length());

    EXPECT_EQ(1, header.Programs().size());
    EXPECT_TRUE(header.HasProgram("_foo_"));
    EXPECT_EQ(string("ide"), header.Program("_foo_").Name());

    EXPECT_EQ(2, header.Comments().size());
    EXPECT_EQ(string("ipsum and so on"), header.Comments().at(0));
    EXPECT_EQ(string("citation needed"), header.Comments().at(1));
}
Example #4
0
static BamHeader PrepareHeader(const std::string& cmdLine,
                               const std::vector<ReadGroupInfo>& inputReadgroups)
{
    ProgramInfo program{APPNAME + "-" + PacBio::UnanimityVersion()};
    program.Name(APPNAME)
        .CommandLine(APPNAME + " " + cmdLine)
        .Description(DESCRIPTION)
        .Version(PacBio::UnanimityVersion());

    std::random_device rd;
    std::default_random_engine rng{rd()};

    if (inputReadgroups.size() > 1) {
        std::cerr << APPNAME << " can currently only handle one readgroup per input bam\n";
        exit(EXIT_FAILURE);
    }

    std::ostringstream movieName;
    movieName << 'm' << std::uniform_int_distribution<uint32_t>{10000, 99999}(rng) << '_'
              << std::uniform_int_distribution<uint32_t>{100000, 999999}(rng) << '_'
              << std::uniform_int_distribution<uint32_t>{100000, 999999}(rng);

    // reuse input readgroups for platform information
    ReadGroupInfo newRg{inputReadgroups.front()};
    newRg.MovieName(movieName.str())
        .ReadType("SUBREAD")
        .PlatformModel(PlatformModelType::SEQUEL)
        .IpdCodec(FrameCodec::V1)
        .PulseWidthCodec(FrameCodec::V1)
        .Id(movieName.str(), "SUBREAD");

    BamHeader header;
    header.PacBioBamVersion("3.0.1")
        .SortOrder("unknown")
        .Version("1.5")
        .AddProgram(program)
        .ReadGroups(std::vector<ReadGroupInfo>{newRg});

    return header;
}