/*
 * Function does the actual depth first search to detect if the thing has an acyclic ordering.
 */
static int64_t dfs(stHash *adjacencyList, stIntTuple *seqPos,
                   stSortedSet *started, stSortedSet *done) {
    if(stSortedSet_search(started, seqPos) != NULL) {
        if(stSortedSet_search(done, seqPos) == NULL) {
            //We have detected a cycle
            //st_logInfo("I have cycle %" PRIi64 " %" PRIi64 "\n", stIntTuple_getPosition(seqPos, 0), stIntTuple_getPosition(seqPos, 1));
            return 1;
        }
        //We have already explored this area, but no cycle.
        return 0;
    }
    stSortedSet_insert(started, seqPos);

    int64_t cycle =0;

    stIntTuple *nextSeqPos = stIntTuple_construct2( stIntTuple_get(seqPos, 0), stIntTuple_get(seqPos, 1) + 1);
    stSortedSet *column = stHash_search(adjacencyList, nextSeqPos);
    if(column != NULL) { //It is in the adjacency list, so we can do the recursion
        assert(stSortedSet_search(column, nextSeqPos) != NULL);
        stSortedSetIterator *it = stSortedSet_getIterator(column);
        stIntTuple *seqPos2;
        while((seqPos2 = stSortedSet_getNext(it)) != NULL) {
            cycle = cycle || dfs(adjacencyList, seqPos2, started, done);
        }
        stSortedSet_destructIterator(it);
    }
    stIntTuple_destruct(nextSeqPos);
    stSortedSet_insert(done, seqPos);
    return cycle;
}
static void getMaximalHaplotypePathsCheck(Flower *flower,
        stSortedSet *segmentSet, const char *eventString, stList *eventStrings) {
    /*
     * Do debug checks that the haplotypes paths are well formed.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) {
            if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) {
                assert(stSortedSet_search(segmentSet, segment) != NULL
                        || stSortedSet_search(segmentSet, segment_getReverse(
                                segment)) != NULL);
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsCheck(group_getNestedFlower(group),
                    segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Exemple #3
0
static void test_stSortedSet_searchGreaterThan(CuTest* testCase) {
    sonLibSortedSetTestSetup();

    for(int32_t i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    //static int32_t sortedInput[] = { -10, -1, 1, 3, 5, 10, 12 };
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -11)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -10)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -10)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, -5)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, -1)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 1)) == stSortedSet_search(sortedSet, stIntTuple_construct(1, 3)));
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 13)) == NULL);
    CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet,
                 stIntTuple_construct(1, 12)) == NULL);

    for(int32_t i=0; i<100; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, st_randomInt(-1000, 1000)));
    }
    stList *list = stSortedSet_getList(sortedSet);
    for(int32_t i=1; i<stList_length(list); i++) {
        stIntTuple *p = stList_get(list, i-1);
        stIntTuple *j = stList_get(list, i);
        stIntTuple *k = stIntTuple_construct(1, st_randomInt(stIntTuple_getPosition(p, 0), stIntTuple_getPosition(j, 0)));
        CuAssertTrue(testCase, stSortedSet_searchGreaterThan(sortedSet, k) == j);
        stIntTuple_destruct(k);
    }
    stList_destruct(list);

    sonLibSortedSetTestTeardown();
}
static void getMaximalHaplotypePathsP2(Segment *segment,
        stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) {
    /*
     * Iterate all the way to one end of the contig then start the traversal to define the maximal
     * haplotype path.
     */
    Cap *_5Cap = segment_get5Cap(segment);
    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
    if (trueAdjacency(_5Cap, eventStrings)) { //Check that the adjacency is supported by a haplotype path
        Segment *otherSegment = getAdjacentCapsSegment(_5Cap);
        assert(segment != otherSegment);
        assert(segment_getReverse(segment) != otherSegment);
        if (otherSegment != NULL) {
            assert(stSortedSet_search(segmentSet, otherSegment) == NULL);
            assert(stSortedSet_search(segmentSet, segment_getReverse(
                    otherSegment)) == NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get3Cap(otherSegment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(otherSegment))));
            getMaximalHaplotypePathsP2(otherSegment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        } else { //We need to start the maximal haplotype recursion
            getMaximalHaplotypePathsP3(segment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        }
    } else {
        getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings);
    }
}
static void getMaximalHaplotypePathsP3(Segment *segment,
        stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) {
    stList_append(maximalHaplotypePath, segment);
    assert(stSortedSet_search(segmentSet, segment) == NULL);
    assert(stSortedSet_search(segmentSet, segment_getReverse(segment)) == NULL);
    stSortedSet_insert(segmentSet, segment);
    Cap *_3Cap = segment_get3Cap(segment);
    if (trueAdjacency(_3Cap, eventStrings)) { //Continue on..
        Segment *otherSegment = getAdjacentCapsSegment(_3Cap);
        if (otherSegment != NULL) {
            getMaximalHaplotypePathsP3(otherSegment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        }
    }
}
static void makeMatchingPerfect(stList *chosenEdges, stList *adjacencyEdges,
        stSortedSet *nodes) {
    /*
     * While the the number of edges is less than a perfect matching add random edges.
     */
    stSortedSet *attachedNodes = getNodeSetOfEdges(chosenEdges);
    stHash *nodesToAdjacencyEdges = getNodesToEdgesHash(adjacencyEdges);
    stIntTuple *pNode = NULL;
    stSortedSetIterator *it = stSortedSet_getIterator(nodes);
    stIntTuple *node;
    while((node = stSortedSet_getNext(it)) != NULL) {
        if (stSortedSet_search(attachedNodes, node) == NULL) {
            if (pNode == NULL) {
                pNode = node;
            } else {
                stList_append(chosenEdges,
                        getEdgeForNodes(stIntTuple_get(pNode, 0), stIntTuple_get(node, 0), nodesToAdjacencyEdges));
                pNode = NULL;
            }
        }
    }
    stSortedSet_destructIterator(it);
    assert(pNode == NULL);
    stSortedSet_destruct(attachedNodes);
    assert(stList_length(chosenEdges) * 2 == stSortedSet_size(nodes));
    stHash_destruct(nodesToAdjacencyEdges);
}
Exemple #7
0
Block *flower_getBlock(Flower *flower, Name name) {
    Block block;
    BlockContents blockContents;
    block.blockContents = &blockContents;
    blockContents.name = name;
    return stSortedSet_search(flower->blocks, &block);
}
/*
 * Uses the functions above to build an adjacency list, then by DFS attempts to create
 * a valid topological sort, returning non-zero if the graph contains a cycle.
 */
static int64_t containsACycle(stList *pairs, int64_t sequenceNumber) {
    //Build an adjacency list structure..
    stHash *adjacencyList = buildAdjacencyList(pairs, sequenceNumber);

    //Do a topological sort of the adjacency list
    stSortedSet *started = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    stSortedSet *done = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
    int64_t cyclic = 0;
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        stIntTuple *seqPos = stIntTuple_construct2( seq, 0); //The following hacks avoid memory cleanup..
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stIntTuple *seqPos2 = stSortedSet_search(column, seqPos);
        assert(seqPos2 != NULL);
        cyclic = cyclic || dfs(adjacencyList, seqPos2, started, done);
        stIntTuple_destruct(seqPos);
    }

    //cleanup
    stHashIterator *it = stHash_getIterator(adjacencyList);
    stIntTuple *seqPos;
    stSortedSet *columns = stSortedSet_construct2((void (*)(void *))stSortedSet_destruct);
    while((seqPos = stHash_getNext(it)) != NULL) {
        stSortedSet *column = stHash_search(adjacencyList, seqPos);
        assert(column != NULL);
        stSortedSet_insert(columns, column);
    }
    stHash_destructIterator(it);
    stHash_destruct(adjacencyList);
    stSortedSet_destruct(columns);
    stSortedSet_destruct(started);
    stSortedSet_destruct(done);

    return cyclic;
}
static void checkComponents(CuTest *testCase, stList *filteredEdges) {
    stHash *nodesToComponents = getComponents(filteredEdges);
    //Check all components are smaller than threshold
    stList *components = stHash_getValues(nodesToComponents);
    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *component = stList_get(components, i);
        CuAssertTrue(testCase, stSortedSet_size(component) <= maxComponentSize);
        CuAssertTrue(testCase, stSortedSet_size(component) >= 1);
    }
    //Check no edges can be added from those filtered.
    stSortedSet *filteredEdgesSet = stList_getSortedSet(filteredEdges, (int(*)(const void *, const void *)) stIntTuple_cmpFn);
    for (int64_t i = 0; i < stList_length(edges); i++) {
        stIntTuple *edge = stList_get(edges, i);
        if (stSortedSet_search(filteredEdgesSet, edge) == NULL) {
            stIntTuple *node1 = stIntTuple_construct1( stIntTuple_get(edge, 1));
            stIntTuple *node2 = stIntTuple_construct1( stIntTuple_get(edge, 2));
            stSortedSet *component1 = stHash_search(nodesToComponents, node1);
            stSortedSet *component2 = stHash_search(nodesToComponents, node2);
            CuAssertTrue(testCase, component1 != NULL && component2 != NULL);
            CuAssertTrue(testCase, component1 != component2);
            CuAssertTrue(testCase, stSortedSet_size(component1) + stSortedSet_size(component2) > maxComponentSize);
            stIntTuple_destruct(node1);
            stIntTuple_destruct(node2);
        }
    }
    stSortedSet_destruct(filteredEdgesSet);
    //Cleanup the components
    stSortedSet *componentsSet = stList_getSortedSet(components, NULL);
    stList_destruct(components);
    stSortedSet_setDestructor(componentsSet, (void(*)(void *)) stSortedSet_destruct);
    stSortedSet_destruct(componentsSet);
    stHash_destruct(nodesToComponents);
}
Exemple #10
0
static void getMetaSequencesForEventsP(stSortedSet *metaSequences,
        Flower *flower, stList *eventStrings) {
    //Iterate over the sequences in the flower.
    Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while ((sequence = flower_getNextSequence(seqIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        if (stringIsInList(event_getHeader(sequence_getEvent(sequence)),
                eventStrings) == 0) {
            if (stSortedSet_search(metaSequences, metaSequence) == NULL) {
                stSortedSet_insert(metaSequences, metaSequence);
            }
        }
    }
    flower_destructSequenceIterator(seqIt);
    //Recurse over the flowers
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMetaSequencesForEventsP(metaSequences,
                    group_getNestedFlower(group), eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Exemple #11
0
Sequence *flower_getSequence(Flower *flower, Name name) {
    Sequence sequence;
    MetaSequence metaSequence;
    sequence.metaSequence = &metaSequence;
    metaSequence.name = name;
    return stSortedSet_search(flower->sequences, &sequence);
}
Exemple #12
0
Cap *flower_getCap(Flower *flower, Name name) {
    Cap cap;
    CapContents capContents;
    cap.capContents = &capContents;
    cap.capContents->instance = name;
    return stSortedSet_search(flower->caps, &cap);
}
Exemple #13
0
End *flower_getEnd(Flower *flower, Name name) {
    End end;
    EndContents endContents;
    end.endContents = &endContents;
    endContents.name = name;
    end.orientation = 1;
    return stSortedSet_search(flower->ends, &end);
}
Exemple #14
0
static void test_stSortedSetIntersection(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    //Check intersection of empty sets is okay..
    stSortedSet *sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0);
    stSortedSet_destruct(sortedSet3);

    int32_t i;
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }

    //Check intersection of empty and non-empty set is empty.
    sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 0);
    stSortedSet_destruct(sortedSet3);

    //Check intersection of two non-empty, overlapping sets in correct.
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 0));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 1));
    stSortedSet_insert(sortedSet2, stIntTuple_construct(1, 5));

    sortedSet3 = stSortedSet_getIntersection(sortedSet, sortedSet2);
    CuAssertTrue(testCase, stSortedSet_size(sortedSet3) == 2);
    stIntTuple *intTuple = stIntTuple_construct(1, 1);
    CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL);
    stIntTuple_destruct(intTuple);
    intTuple = stIntTuple_construct(1, 5);
    CuAssertTrue(testCase, stSortedSet_search(sortedSet3, intTuple) != NULL);
    stIntTuple_destruct(intTuple);
    stSortedSet_destruct(sortedSet3);

    //Check we get an exception with sorted sets with different comparators.
    stSortedSet *sortedSet4 = stSortedSet_construct();
    stTry {
        stSortedSet_getIntersection(sortedSet, sortedSet4);
    } stCatch(except) {
        CuAssertTrue(testCase, stExcept_getId(except) == SORTED_SET_EXCEPTION_ID);
    }
    stTryEnd
    stSortedSet_destruct(sortedSet4);

    sonLibSortedSetTestTeardown();
}
Exemple #15
0
void stSortedSet_insert(stSortedSet *sortedSet, void *object) {
    checkModifiable(sortedSet);
    // FIXME: two passes, modify avl code.
    if(stSortedSet_search(sortedSet, object) != NULL) {
        avl_replace(sortedSet->sortedSet, object);
    }
    else {
        avl_insert(sortedSet->sortedSet, object);
    }
}
static stList *getEdgesThatBridgeComponents(stList *components,
        stHash *nodesToNonZeroWeightedAdjacencyEdges) {
    /*
     * Get set of adjacency edges that bridge between (have a node in two) components.
     */

    stList *bridgingAdjacencyEdges = stList_construct();

    for (int64_t i = 0; i < stList_length(components); i++) {
        stSortedSet *componentNodes = getNodeSetOfEdges(
                stList_get(components, i));
        stSortedSetIterator *it = stSortedSet_getIterator(componentNodes);
        stIntTuple *node;
        while ((node = stSortedSet_getNext(it)) != NULL) {
            stList *edges = stHash_search(nodesToNonZeroWeightedAdjacencyEdges,
                    node);
            if (edges != NULL) {
                for (int64_t j = 0; j < stList_length(edges); j++) {
                    stIntTuple *edge = stList_get(edges, j);
                    stIntTuple *node1 = stIntTuple_construct1(
                            stIntTuple_get(edge, 0));
                    stIntTuple *node2 = stIntTuple_construct1(
                            stIntTuple_get(edge, 1));
                    assert(
                            stSortedSet_search(componentNodes, node1) != NULL
                                    || stSortedSet_search(componentNodes, node2)
                                            != NULL);
                    if (stSortedSet_search(componentNodes, node1) == NULL
                            || stSortedSet_search(componentNodes, node2)
                                    == NULL) {
                        stList_append(bridgingAdjacencyEdges, edge);
                    }
                    stIntTuple_destruct(node1);
                    stIntTuple_destruct(node2);
                }
            }
        }
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(componentNodes);
    }

    return bridgingAdjacencyEdges;
}
Exemple #17
0
static void test_stSortedSet(CuTest* testCase) {
    sonLibSortedSetTestSetup();
    int32_t i;
    CuAssertIntEquals(testCase, 0, stSortedSet_size(sortedSet));
    for(i=0; i<size; i++) {
        stSortedSet_insert(sortedSet, stIntTuple_construct(1, input[i]));
    }
    CuAssertIntEquals(testCase, sortedSize, stSortedSet_size(sortedSet));
    CuAssertIntEquals(testCase, sortedInput[0], stIntTuple_getPosition(stSortedSet_getFirst(sortedSet), 0));
    CuAssertIntEquals(testCase, sortedInput[sortedSize-1], stIntTuple_getPosition(stSortedSet_getLast(sortedSet), 0));
    for(i=0; i<sortedSize; i++) {
        CuAssertIntEquals(testCase, sortedSize-i, stSortedSet_size(sortedSet));
        stIntTuple *tuple = stIntTuple_construct(1, sortedInput[i]);
        CuAssertTrue(testCase, stIntTuple_getPosition(stSortedSet_search(sortedSet, tuple), 0) == sortedInput[i]);
        stSortedSet_remove(sortedSet, tuple);
        CuAssertTrue(testCase, stSortedSet_search(sortedSet, tuple) == NULL);
        stIntTuple_destruct(tuple);
    }
    sonLibSortedSetTestTeardown();
}
static void getMaximalHaplotypePathsP(Flower *flower,
        stList *maximalHaplotypePaths, stSortedSet *segmentSet,
        const char *eventString,
        stList *eventStrings) {
    /*
     *  Iterate through the segments in this flower.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (stSortedSet_search(segmentSet, segment) == NULL
                && stSortedSet_search(segmentSet, segment_getReverse(segment))
                        == NULL) { //Check we haven't yet seen this segment
            if (strcmp(event_getHeader(segment_getEvent(segment)), eventString)
                    == 0) { //Check if the segment is in the assembly
                if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment
                    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                    stList *maximalHaplotypePath = stList_construct();
                    stList_append(maximalHaplotypePaths, maximalHaplotypePath);
                    getMaximalHaplotypePathsP2(segment, maximalHaplotypePath,
                            segmentSet, eventStrings);
                } else {
                    assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                }
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);
    /*
     * Now recurse on the contained flowers.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsP(group_getNestedFlower(group),
                    maximalHaplotypePaths, segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
static int64_t intersectionSize(stSortedSet *set, stList *list) {
    /*
     * Returns the intersection size of the list and the set.
     */
    int64_t count = 0;
    for (int64_t j = 0; j < stList_length(list); j++) {
        if (stSortedSet_search(set, stList_get(list, j)) != NULL) {
            count++;
        }
    }
    return count;
}
Exemple #20
0
void testCactusDisk_getUniqueID_Unique(CuTest* testCase) {
    cactusDiskTestSetup();
    stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free);
    for (int64_t i = 0; i < 100000; i++) { //Gets a billion ids, checks we are good.
        Name uniqueName = cactusDisk_getUniqueID(cactusDisk);
        CuAssertTrue(testCase, uniqueName > 0);
        CuAssertTrue(testCase, uniqueName < INT64_MAX);
        CuAssertTrue(testCase, uniqueName != NULL_NAME);
        char *cA = cactusMisc_nameToString(uniqueName);
        CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL);
        CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName);
        stSortedSet_insert(uniqueNames, cA);
    }
    stSortedSet_destruct(uniqueNames);
    cactusDiskTestTeardown();
}
Exemple #21
0
stSortedSet *stSortedSet_getDifference(stSortedSet *sortedSet1, stSortedSet *sortedSet2) {
    if(!stSortedSet_comparatorsEqual(sortedSet1, sortedSet2)) {
        stThrowNew(SORTED_SET_EXCEPTION_ID, "Comparators are not equal for creating the sorted set difference");
    }
    stSortedSet *sortedSet3 = stSortedSet_construct3(stSortedSet_getComparator(sortedSet1)->compareFn, NULL);

    //Add those from sortedSet1 only if they are not in sortedSet2
    stSortedSetIterator *it= stSortedSet_getIterator(sortedSet1);
    void *o;
    while((o = stSortedSet_getNext(it)) != NULL) {
        if(stSortedSet_search(sortedSet2, o) == NULL) {
            stSortedSet_insert(sortedSet3, o);
        }
    }
    stSortedSet_destructIterator(it);

    return sortedSet3;
}
Exemple #22
0
void testCactusDisk_getUniqueID_UniqueIntervals(CuTest* testCase) {
    cactusDiskTestSetup();
    stSortedSet *uniqueNames = stSortedSet_construct3(testCactusDisk_getUniqueID_UniqueP, free);
    for (int64_t i = 0; i < 10; i++) { //Gets a billion ids, checks we are good.
        int64_t intervalSize = st_randomInt(0, 100000);
        Name uniqueName = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize);
        for(int64_t j=0; j<intervalSize; j++) {
            CuAssertTrue(testCase, uniqueName > 0);
            CuAssertTrue(testCase, uniqueName < INT64_MAX);
            CuAssertTrue(testCase, uniqueName != NULL_NAME);
            char *cA = cactusMisc_nameToString(uniqueName);
            CuAssertTrue(testCase, stSortedSet_search(uniqueNames, cA) == NULL);
            CuAssertTrue(testCase, cactusMisc_stringToName(cA) == uniqueName);
            stSortedSet_insert(uniqueNames, cA);
            uniqueName++;
        }
    }
    stSortedSet_destruct(uniqueNames);
    cactusDiskTestTeardown();
}
/*
 * This builds an adjacency list structure for the the sequences. Every sequence-position
 * has a column in the hash with which it can be aligned with.
 */
static stHash *buildAdjacencyList(stList *pairs, int64_t sequenceNumber) {
    stHash *hash = stHash_construct3((uint64_t (*)(const void *))stIntTuple_hashKey,
                                     (int (*)(const void *, const void *))stIntTuple_equalsFn,
                                     (void (*)(void *))stIntTuple_destruct, NULL);
    for(int64_t seq=0; seq<sequenceNumber; seq++) {
        for(int64_t position=0; position<MAX_SEQUENCE_SIZE; position++) {
            stIntTuple *seqPos = stIntTuple_construct2( seq, position);
            stSortedSet *column = stSortedSet_construct3((int (*)(const void *, const void *))stIntTuple_cmpFn, NULL);
            stSortedSet_insert(column, seqPos);
            stHash_insert(hash, seqPos, column);
        }
    }
    stListIterator *it = stList_getIterator(pairs);
    stIntTuple *pair;
    while((pair = stList_getNext(it)) != NULL) {
        stIntTuple *seqPos1 = stIntTuple_construct2( stIntTuple_get(pair, 0), stIntTuple_get(pair, 1));
        stIntTuple *seqPos2 = stIntTuple_construct2( stIntTuple_get(pair, 2), stIntTuple_get(pair, 3));
        stSortedSet *column1 = stHash_search(hash, seqPos1);
        assert(column1 != NULL);
        stSortedSet *column2 = stHash_search(hash, seqPos2);
        assert(column2 != NULL);
        if(column1 != column2) { //Merge the columns
            stSortedSetIterator *it2 = stSortedSet_getIterator(column2);
            stIntTuple *seqPos3;
            while((seqPos3 = stSortedSet_getNext(it2)) != NULL) {
                assert(stSortedSet_search(column1, seqPos3) == NULL);
                stSortedSet_insert(column1, seqPos3);
                assert(stHash_search(hash, seqPos3) == column2);
                stHash_insert(hash, seqPos3, column1);
                assert(stHash_search(hash, seqPos3) == column1);
            }
            stSortedSet_destructIterator(it2);
            stSortedSet_destruct(column2);
        }
        //Cleanup loop.
        stIntTuple_destruct(seqPos1);
        stIntTuple_destruct(seqPos2);
    }
    stList_destructIterator(it);
    return hash;
}
Exemple #24
0
static void getOrderedSegmentsP(Flower *flower,
        stSortedSet *segments) {
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
            if (!segment_getStrand(segment)) {
                segment = segment_getReverse(segment);
            }
            assert(stSortedSet_search(segments, segment) == NULL);
            stSortedSet_insert(segments, segment);
    }
    flower_destructSegmentIterator(segmentIt);
    //Recurse over the flowers
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getOrderedSegmentsP(group_getNestedFlower(group),
                    segments);
        }
    }
    flower_destructGroupIterator(groupIt);
}
static void getComponentsP(stHash *nodesToEdges, int64_t node,
        stSortedSet *component) {
    stIntTuple *key = stIntTuple_construct1( node);
    stList *edges = stHash_search(nodesToEdges, key);
    if (edges != NULL) {
        stHash_remove(nodesToEdges, key);
        for (int64_t i = 0; i < stList_length(edges); i++) {
            stIntTuple *edge = stList_get(edges, i);
            if (stSortedSet_search(component, edge) == NULL) {
                stSortedSet_insert(component, edge);
            }
            /*
             * Recursion on stack could equal the total number of nodes.
             */
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 0),
                    component);
            getComponentsP(nodesToEdges, stIntTuple_get(edge, 1),
                    component);
        }
        stList_destruct(edges);
    }
    stIntTuple_destruct(key);
}
Exemple #26
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
Exemple #27
0
Segment *block_getInstance(Block *block, Name name) {
	Segment segment;
	segment.name = name;
	return block_getInstanceP(block, stSortedSet_search(block->blockContents->segments, &segment));
}
Exemple #28
0
int main(int argc, char *argv[]) {
    st_setLogLevelFromString(argv[1]);
    st_logDebug("Set up logging\n");

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logDebug("Set up the flower disk\n");

    Name flowerName = cactusMisc_stringToName(argv[3]);
    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    int64_t totalBases = flower_getTotalBaseLength(flower);
    int64_t totalEnds = flower_getEndNumber(flower);
    int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower);
    int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower);
    int64_t totalCaps = flower_getCapNumber(flower);
    int64_t totalBlocks = flower_getBlockNumber(flower);
    int64_t totalGroups = flower_getGroupNumber(flower);
    int64_t totalChains = flower_getChainNumber(flower);
    int64_t totalLinkGroups = 0;
    int64_t maxEndDegree = 0;
    int64_t maxAdjacencyLength = 0;
    int64_t totalEdges = 0;

    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while((end = flower_getNextEnd(endIt)) != NULL) {
        assert(end_getOrientation(end));
        if(end_getInstanceNumber(end) > maxEndDegree) {
            maxEndDegree = end_getInstanceNumber(end);
        }
        stSortedSet *ends = stSortedSet_construct();
        End_InstanceIterator *capIt = end_getInstanceIterator(end);
        Cap *cap;
        while((cap = end_getNext(capIt)) != NULL) {
            if(cap_getSequence(cap) != NULL) {
                Cap *adjacentCap = cap_getAdjacency(cap);
                assert(adjacentCap != NULL);
                End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap));
                stSortedSet_insert(ends, adjacentEnd);
                int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
                if(adjacencyLength < 0) {
                    adjacencyLength *= -1;
                }
                assert(adjacencyLength >= 1);
                if(adjacencyLength >= maxAdjacencyLength) {
                    maxAdjacencyLength = adjacencyLength;
                }
            }
        }
        end_destructInstanceIterator(capIt);
        totalEdges += stSortedSet_size(ends);
        if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works.
            totalEdges += 1;
        }
        stSortedSet_destruct(ends);
    }
    assert(totalEdges % 2 == 0);
    flower_destructEndIterator(endIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        if(group_getLink(group) != NULL) {
            totalLinkGroups++;
        }
    }
    flower_destructGroupIterator(groupIt);

    printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n",
            flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups);

    return 0;
}
Event *eventTree_getEvent(EventTree *eventTree, Name eventName) {
	Event event;
	event.name = eventName;
	return stSortedSet_search(eventTree->events, &event);
}
Exemple #30
0
void flower_removeFace(Flower *flower, Face *face) {
    assert(stSortedSet_search(flower->faces, face) != NULL);
    stSortedSet_remove(flower->faces, face);
}