Example #1
0
void indexJunctionsFromStack(istream& stack_stream, JunctionIndex& index)
{
    Feature j;
    while (StackReader::getNextFeature(stack_stream, j))
    {
        index.add(j);
    }
}
Example #2
0
void indexJunctionsFromGFF(istream& gff_stream, JunctionIndex& index)
{
    vector<Feature> all;
    vector<Feature> genes;
    vector<Feature> transcripts;
    getGenesAndTranscriptsFromGFF(gff_stream, all, genes, transcripts);

    vector<string> exon_types;
    exon_types.push_back("exon");
    exon_types.push_back("pseudogenic_exon");

    vector<Feature> juncs;
    for (vector<Feature>::iterator transcript = transcripts.begin(); 
         transcript != transcripts.end(); ++transcript)
    {
        transcript->spliceJunctions(juncs, exon_types);
    }

    // TODO optimize to add directly to index
    index.add(juncs.begin(), juncs.end());
}
Example #3
0
TEST(JunctionIndexTest, unique)
{
    // JunctionIndex::unique() allows getting all unique junctions in this index
    Feature f;
    JunctionIndex index;

    f.seqid = "Chr1";
    f.source = "A";
    f.start = 10;
    f.end = 20;

    index.add(f);
    // add a duplicate
    f.source = "B";
    index.add(f);

    f.source = "C";
    f.start = 15;
    index.add(f);

    f.source = "D";
    f.seqid = "Chr2";
    index.add(f);

    std::vector<std::string> sources;
    std::vector<Feature> ret;
    index.unique(ret);

    for (std::vector<Feature>::iterator it = ret.begin(); it != ret.end(); ++it)
    {
        sources.push_back((*it).source);
    }
    EXPECT_THAT(sources, WhenSorted(ElementsAre("A", "C", "D")));

    // test count()
    EXPECT_EQ(3, index.uniqueCount());
}
Example #4
0
TEST(JunctionIndexTest, overlappingFeature)
{
    JunctionIndex index;

    // a and b are valid junctions and will be returned from the query
    Feature a;
    a.seqid = "one";
    a.source = "A";
    a.start = 20;
    a.end = 40;
    index.add(a);

    Feature b;
    b.start = 60;
    b.end = 80;
    b.seqid = "one";
    b.source = "B";
    index.add(b);

    // The query range will fall in the middle of this junction,
    // so it won't be returned.
    // i.e. valid junctions must fall entirely within the query range
    Feature c;
    c.seqid = "one";
    c.source = "C";
    c.start = 100;
    c.end = 150;
    index.add(c);

    // duplicate of Feature a.
    // JunctionIndex only returns unique junctions,
    // so this won't be returned as a duplicate
    Feature d;
    d.seqid = "one";
    d.start = 20;
    d.end = 40;
    index.add(d);

    // will fall in a valid range, but has a different seqid than the query
    // so won't be returned as overlapping query feature
    Feature e;
    e.seqid = "two";
    e.start = 20;
    e.end = 40;
    index.add(e);

    Feature query;
    query.seqid = "one";
    query.start = 10;
    query.end = 110;

    std::vector<Feature> ret;
    index.overlappingFeature(query, ret);
    
    std::vector<std::string> sources;
    for (std::vector<Feature>::iterator it = ret.begin(); it != ret.end(); ++it)
    {
        sources.push_back((*it).source);
    }

    EXPECT_THAT(sources, WhenSorted(ElementsAre("A", "B")));
}