コード例 #1
0
ファイル: sonLibSortedSetTest.c プロジェクト: sorrywm/sonLib
static void test_stSortedSetEquals(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet));
    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet));

    CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet2));
    for(i=1; i<size; i++) { //first argument is unique in input..
        stSortedSet_insert(sortedSet2, stIntTuple_construct(1, input[i]));
    }
    CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet2));

    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, input[0]));
    CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet2));

    stSortedSet *sortedSet3 = stSortedSet_construct(); //diff comparator..
    CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3));
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet3, stIntTuple_construct(1, input[i]));
    }
    CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3));
    stSortedSet_destruct(sortedSet3);

    sonLibSortedSetTestTeardown();
}
コード例 #2
0
static stHash *getComponents(stList *filteredEdges) {
    /*
     * A kind of stupid reimplementation of the greedy function, done just to trap typos.
     */
    stHash *nodesToComponents = stHash_construct3((uint64_t(*)(const void *)) stIntTuple_hashKey,
            (int(*)(const void *, const void *)) stIntTuple_equalsFn, NULL, NULL);
    for (int64_t i = 0; i < stList_length(nodes); i++) {
        stIntTuple *node = stList_get(nodes, i);
        stSortedSet *component = stSortedSet_construct();
        stSortedSet_insert(component, node);
        stHash_insert(nodesToComponents, node, component);
    }
    for (int64_t i = 0; i < stList_length(filteredEdges); i++) {
        stIntTuple *edge = stList_get(filteredEdges, i);
        stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
        stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
        stSortedSet *component1 = stHash_search(nodesToComponents, node1);
        stSortedSet *component2 = stHash_search(nodesToComponents, node2);
        assert(component1 != NULL && component2 != NULL);
        if (component1 != component2) {
            stSortedSet *component3 = stSortedSet_getUnion(component1, component2);
            stSortedSetIterator *setIt = stSortedSet_getIterator(component3);
            stIntTuple *node3;
            while ((node3 = stSortedSet_getNext(setIt)) != NULL) {
                stHash_insert(nodesToComponents, node3, component3);
            }
            stSortedSet_destructIterator(setIt);
            stSortedSet_destruct(component1);
            stSortedSet_destruct(component2);
        }
        stIntTuple_destruct(node1);
        stIntTuple_destruct(node2);
    }
    return nodesToComponents;
}
コード例 #3
0
ファイル: linkage.c プロジェクト: benedictpaten/assemblaLib
stSortedSet *getMetaSequencesForEvents(Flower *flower, stList *eventStrings) {
    /*
     * Gets the haplotype sequences in the set.
     */
    stSortedSet *metaSequences = stSortedSet_construct();
    getMetaSequencesForEventsP(metaSequences, flower, eventStrings);
    return metaSequences;
}
コード例 #4
0
ファイル: sonLibListTest.c プロジェクト: adderan/sonLib
void test_stList_filter(CuTest *testCase) {
    setup();
    stSortedSet *set = stSortedSet_construct();
    stSortedSet_insert(set, strings[0]);
    stSortedSet_insert(set, strings[4]);
    stList *list2 = stList_filterToExclude(list, set);
    stList *list3 = stList_filterToInclude(list, set);
    CuAssertTrue(testCase,stList_length(list2) == 3);
    CuAssertTrue(testCase,stList_length(list3) == 2);
    CuAssertTrue(testCase,stList_get(list2, 0) == strings[1]);
    CuAssertTrue(testCase,stList_get(list2, 1) == strings[2]);
    CuAssertTrue(testCase,stList_get(list2, 2) == strings[3]);
    CuAssertTrue(testCase,stList_get(list3, 0) == strings[0]);
    CuAssertTrue(testCase,stList_get(list3, 1) == strings[4]);
    teardown();
}
コード例 #5
0
ファイル: sonLibSortedSetTest.c プロジェクト: sorrywm/sonLib
static void test_stSortedSetDifference(CuTest* testCase) {
    sonLibSortedSetTestSetup();

    //Check difference of empty sets is okay..
    stSortedSet *sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0);
    stSortedSet_destruct(sortedSet3);

    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }

    //Check difference of non-empty set / empty set is the non-empty.
    sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet3));
    stSortedSet_destruct(sortedSet3);

    //Check difference of two non-empty, overlapping sets in correct.
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5));

    sortedSet3 = stSortedSet_getDifference(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == stSortedSet_size(sortedSet) - 2);
    CuAssertTrue(testCase, !stSortedSet_equals(sortedSet, sortedSet3));
    stSortedSet_insert(sortedSet3, stIntTuple_construct(1, 1));
    stSortedSet_insert(sortedSet3, stIntTuple_construct(1, 5));
    CuAssertTrue(testCase, stSortedSet_equals(sortedSet, sortedSet3));
    stSortedSet_destruct(sortedSet3);

    //Check we get an exception when merging sorted sets with different comparators.
    stSortedSet *sortedSet4 = stSortedSet_construct();
    stTry {
        stSortedSet_getDifference(sortedSet, sortedSet4);
        CuAssertTrue(testCase, 0);
    } stCatch(except) {
        CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID);
    }
    stTryEnd
    stSortedSet_destruct(sortedSet4);

    sonLibSortedSetTestTeardown();
}
コード例 #6
0
ファイル: sonLibSortedSetTest.c プロジェクト: sorrywm/sonLib
static void test_stSortedSetIntersection(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    //Check intersection of empty sets is okay..
    stSortedSet *sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0);
    stSortedSet_destruct(sortedSet3);

    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }

    //Check intersection of empty and non-empty set is empty.
    sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0);
    stSortedSet_destruct(sortedSet3);

    //Check intersection of two non-empty, overlapping sets in correct.
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5));

    sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 2);
    stIntTuple *intTuple = stIntTuple_construct(1, 1);
    CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL);
    stIntTuple_destruct(intTuple);
    intTuple = stIntTuple_construct(1, 5);
    CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL);
    stIntTuple_destruct(intTuple);
    stSortedSet_destruct(sortedSet3);

    //Check we get an exception with sorted sets with different comparators.
    stSortedSet *sortedSet4 = stSortedSet_construct();
    stTry {
        stSortedSet_getIntersection(sortedSet, sortedSet4);
    } stCatch(except) {
        CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID);
    }
    stTryEnd
    stSortedSet_destruct(sortedSet4);

    sonLibSortedSetTestTeardown();
}
コード例 #7
0
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) {
    stList *maximalHaplotypePaths = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    stSortedSet *segmentSet = stSortedSet_construct();
    getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings);

    //Do some debug checks..
    st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length(
            maximalHaplotypePaths));
    getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings);
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n",
                stList_length(maximalHaplotypePath));
        assert(stList_length(maximalHaplotypePath) > 0);
        Segment *_5Segment = stList_get(maximalHaplotypePath, 0);
        Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length(
                maximalHaplotypePath) - 1);
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings));
        }
        if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) {
            assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings));
        }
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) {
            _5Segment = stList_get(maximalHaplotypePath, j);
            _3Segment = stList_get(maximalHaplotypePath, j + 1);
            assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings));
            assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings));
            assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment)))
                    == getTerminalCap(segment_get5Cap(_3Segment)));
            assert(strcmp(event_getHeader(segment_getEvent(_5Segment)),
                   eventString) == 0);
            assert(strcmp(event_getHeader(segment_getEvent(_3Segment)),
                    eventString) == 0);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment))));
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment))));
        }
    }

    stSortedSet_destruct(segmentSet);

    return maximalHaplotypePaths;
}
stList *getComponents(stList *edges) {
    /*
     * Gets a list of connected components, each connected component
     * being represented as a list of the edges, such that each edge is in exactly one
     * connected component. Allows for multi-graphs (multiple edges connecting two nodes).
     */

    stHash *nodesToEdges = getNodesToEdgesHash(edges);

    /*
     * Traverse the edges greedily
     */
    stList *components =
            stList_construct3(0, (void(*)(void *)) stList_destruct);
    stList *nodes = stHash_getKeys(nodesToEdges);
    while (stList_length(nodes) > 0) {
        stIntTuple *node = stList_pop(nodes);
        stList *edges = stHash_search(nodesToEdges, node);
        if (edges != NULL) { //We have a component to build
            stSortedSet *component = stSortedSet_construct();
            stHash_remove(nodesToEdges, node);
            for (int64_t i = 0; i < stList_length(edges); i++) {
                stIntTuple *edge = stList_get(edges, i);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                        component);
                getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                        component);
            }
            stList_append(components, stSortedSet_getList(component));
            //Cleanup
            stSortedSet_destruct(component);
            stList_destruct(edges);
        }
        stIntTuple_destruct(node);
    }
    assert(stHash_size(nodesToEdges) == 0);
    stHash_destruct(nodesToEdges);
    stList_destruct(nodes);

    return components;
}
コード例 #9
0
int main(int argc, char *argv[]) {
    st_setLogLevelFromString(argv[1]);
    st_logDebug("Set up logging\n");

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logDebug("Set up the flower disk\n");

    Name flowerName = cactusMisc_stringToName(argv[3]);
    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    int64_t totalBases = flower_getTotalBaseLength(flower);
    int64_t totalEnds = flower_getEndNumber(flower);
    int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower);
    int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower);
    int64_t totalCaps = flower_getCapNumber(flower);
    int64_t totalBlocks = flower_getBlockNumber(flower);
    int64_t totalGroups = flower_getGroupNumber(flower);
    int64_t totalChains = flower_getChainNumber(flower);
    int64_t totalLinkGroups = 0;
    int64_t maxEndDegree = 0;
    int64_t maxAdjacencyLength = 0;
    int64_t totalEdges = 0;

    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while((end = flower_getNextEnd(endIt)) != NULL) {
        assert(end_getOrientation(end));
        if(end_getInstanceNumber(end) > maxEndDegree) {
            maxEndDegree = end_getInstanceNumber(end);
        }
        stSortedSet *ends = stSortedSet_construct();
        End_InstanceIterator *capIt = end_getInstanceIterator(end);
        Cap *cap;
        while((cap = end_getNext(capIt)) != NULL) {
            if(cap_getSequence(cap) != NULL) {
                Cap *adjacentCap = cap_getAdjacency(cap);
                assert(adjacentCap != NULL);
                End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap));
                stSortedSet_insert(ends, adjacentEnd);
                int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
                if(adjacencyLength < 0) {
                    adjacencyLength *= -1;
                }
                assert(adjacencyLength >= 1);
                if(adjacencyLength >= maxAdjacencyLength) {
                    maxAdjacencyLength = adjacencyLength;
                }
            }
        }
        end_destructInstanceIterator(capIt);
        totalEdges += stSortedSet_size(ends);
        if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works.
            totalEdges += 1;
        }
        stSortedSet_destruct(ends);
    }
    assert(totalEdges % 2 == 0);
    flower_destructEndIterator(endIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        if(group_getLink(group) != NULL) {
            totalLinkGroups++;
        }
    }
    flower_destructGroupIterator(groupIt);

    printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n",
            flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups);

    return 0;
}
コード例 #10
0
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}