static enum CapCode getHaplotypeSwitchCode(Cap *cap, stList *eventStrings) {
    Cap *adjacentCap = cap_getAdjacency(getTerminalCap(cap));
    assert(adjacentCap != NULL);
    End *end = cap_getEnd(cap);
    End *adjacentEnd = cap_getEnd(adjacentCap);
    stSortedSet *eventStringsForEnd1 = getEventStrings(end, eventStrings);
    stSortedSet *eventStringsForEnd2 = getEventStrings(adjacentEnd, eventStrings);

    assert(stSortedSet_size(eventStringsForEnd1) > 0);
    assert(stSortedSet_size(eventStringsForEnd2) > 0);

    stSortedSet *intersectionOfEventStrings = stSortedSet_getIntersection(
            eventStringsForEnd1, eventStringsForEnd2);

    enum CapCode code1 = (stSortedSet_size(intersectionOfEventStrings)
            != stSortedSet_size(eventStringsForEnd1) || stSortedSet_size(
                    intersectionOfEventStrings)
            != stSortedSet_size(eventStringsForEnd2)) ? HAP_SWITCH
    : HAP_NOTHING;

    stSortedSet_destruct(eventStringsForEnd1);
    stSortedSet_destruct(eventStringsForEnd2);
    stSortedSet_destruct(intersectionOfEventStrings);

    return code1;
}
static void getMaximalHaplotypePathsP2(Segment *segment,
        stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) {
    /*
     * Iterate all the way to one end of the contig then start the traversal to define the maximal
     * haplotype path.
     */
    Cap *_5Cap = segment_get5Cap(segment);
    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
    if (trueAdjacency(_5Cap, eventStrings)) { //Check that the adjacency is supported by a haplotype path
        Segment *otherSegment = getAdjacentCapsSegment(_5Cap);
        assert(segment != otherSegment);
        assert(segment_getReverse(segment) != otherSegment);
        if (otherSegment != NULL) {
            assert(stSortedSet_search(segmentSet, otherSegment) == NULL);
            assert(stSortedSet_search(segmentSet, segment_getReverse(
                    otherSegment)) == NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get3Cap(otherSegment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(otherSegment))));
            getMaximalHaplotypePathsP2(otherSegment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        } else { //We need to start the maximal haplotype recursion
            getMaximalHaplotypePathsP3(segment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        }
    } else {
        getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings);
    }
}
bool trueAdjacency(Cap *cap, stList *eventStrings) {
	if(getTerminalAdjacencyLength(cap) > 0) {
        return 0;
    }
	cap = getTerminalCap(cap);
    assert(cap != NULL);
    Cap *otherCap = cap_getAdjacency(cap);
    assert(otherCap != NULL);
    assert(cap_getAdjacency(otherCap) == cap);
    //So is the adjacency present in one of the haplotypes? That's what we're going to answer..
    End *otherEnd = end_getPositiveOrientation(cap_getEnd(otherCap));
    End_InstanceIterator *endInstanceIt = end_getInstanceIterator(cap_getEnd(cap));
    Cap *cap2;
    while ((cap2 = end_getNext(endInstanceIt)) != NULL) {
        Cap *otherCap2 = cap_getAdjacency(cap2);
        assert(otherCap2 != NULL);
        if (otherEnd == end_getPositiveOrientation(cap_getEnd(otherCap2))) {
            //const char *eventName = event_getHeader(cap_getEvent(cap2));
            assert(event_getHeader(cap_getEvent(cap2)) == event_getHeader(
                            cap_getEvent(otherCap2)));
            if (capHasGivenEvents(cap2, eventStrings)) { //strcmp(eventName, "hapA1") == 0 || strcmp(eventName, "hapA2") == 0) {
                if(getTerminalAdjacencyLength(cap2) == 0) {
                    end_destructInstanceIterator(endInstanceIt);
                    return 1;
                }
            }
        }
    }
    end_destructInstanceIterator(endInstanceIt);
    return 0;
}
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath,
        stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash,
        stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) {
    int64_t insertLength;
    int64_t deleteLength;
    Cap *otherCap;
    enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
    if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) {
        Segment *adjacentSegment = getAdjacentCapsSegment(cap);
        assert(adjacentSegment != NULL);
        while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) {
            adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment));
            assert(adjacentSegment != NULL);
        }
        assert(adjacentSegment != NULL);
        assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment))));
        stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath);
        (void)j;
        assert(j != NULL);
        stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
        if (adjacentHaplotypePath == NULL) {
            adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(adjacentSegment));
        }
        assert(adjacentHaplotypePath != NULL);
        assert(adjacentHaplotypePath != haplotypePath);
        stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath);
        (void)k;
        assert(k != NULL);
        assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0));
        assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) ==
                stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath));
    }
}
static int64_t traceThreadLength(Cap *cap, Cap **terminatingCap) {
    /*
     * Gets the length in bases of the thread in the flower, starting from a given attached stub cap.
     * The thread length includes the lengths of adjacencies that it contains.
     *
     * Terminating cap is initialised with the final cap on the thread from cap.
     */
    assert(end_isStubEnd(cap_getEnd(cap)));
    int64_t threadLength = 0;
    while (1) {
        assert(cap_getCoordinate(cap) != INT64_MAX);
        int64_t adjacencyLength = cap_getCoordinate(cap);
        threadLength += adjacencyLength;
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(adjacencyLength == cap_getCoordinate(adjacentCap));
        //Traverse any block..
        if (cap_getSegment(adjacentCap) != NULL) {
            threadLength += segment_getLength(cap_getSegment(adjacentCap));
            cap = cap_getOtherSegmentCap(adjacentCap);
            assert(cap != NULL);
        } else {
            assert(end_isStubEnd(cap_getEnd(adjacentCap)));
            *terminatingCap = adjacentCap;
            return threadLength;
        }
    }
    return 1;
}
Exemple #6
0
void testCap_getEnd(CuTest* testCase) {
    cactusCapTestSetup();
    CuAssertTrue(testCase, cap_getEnd(rootCap) == end_getReverse(end));
    CuAssertTrue(testCase, cap_getEnd(cap_getReverse(rootCap)) == end);
    CuAssertTrue(testCase, cap_getEnd(leaf2Cap) == end);
    CuAssertTrue(testCase, cap_getEnd(cap_getReverse(leaf2Cap)) == end_getReverse(end));
    cactusCapTestTeardown();
}
Exemple #7
0
void testCap_getOrientation(CuTest* testCase) {
    cactusCapTestSetup();
    CuAssertTrue(testCase, cap_getOrientation(rootCap) == end_getOrientation(cap_getEnd(rootCap)));
    CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) == end_getOrientation(cap_getEnd(leaf1Cap)));
    CuAssertTrue(testCase, cap_getOrientation(leaf2Cap) == end_getOrientation(cap_getEnd(leaf2Cap)));

    CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(rootCap)) == end_getOrientation(end_getReverse(cap_getEnd(rootCap))));
    CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(leaf1Cap)) == end_getOrientation(end_getReverse(cap_getEnd(leaf1Cap))));
    CuAssertTrue(testCase, cap_getOrientation(cap_getReverse(leaf2Cap)) == end_getOrientation(end_getReverse(cap_getEnd(leaf2Cap))));

    CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) == cap_getOrientation(rootCap));
    CuAssertTrue(testCase, cap_getOrientation(leaf1Cap) != cap_getOrientation(leaf2Cap));

    cactusCapTestTeardown();
}
static void recoverBrokenAdjacencies(Flower *flower, stList *recoveredCaps, Name referenceEventName) {
    /*
     * Find reference intervals that are book-ended by stubs created in a child flower.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        Flower *nestedFlower;
        if((nestedFlower = group_getNestedFlower(group)) != NULL) {
            Flower_EndIterator *endIt = flower_getEndIterator(nestedFlower);
            End *childEnd;
            while((childEnd = flower_getNextEnd(endIt)) != NULL) {
                if(end_isStubEnd(childEnd) && flower_getEnd(flower, end_getName(childEnd)) == NULL) { //We have a thread we need to promote
                    Cap *childCap = getCapForReferenceEvent(childEnd, referenceEventName); //The cap in the reference
                    assert(childCap != NULL);
                    assert(!end_isAttached(childEnd));
                    childCap = cap_getStrand(childCap) ? childCap : cap_getReverse(childCap);
                    if (!cap_getSide(childCap)) {
                        Cap *adjacentChildCap = NULL;
                        int64_t adjacencyLength = traceThreadLength(childCap, &adjacentChildCap);
                        Cap *cap = copyCapToParent(childCap, recoveredCaps);
                        assert(adjacentChildCap != NULL);
                        assert(!end_isAttached(cap_getEnd(adjacentChildCap)));
                        assert(!cap_getSide(cap));
                        Cap *adjacentCap = copyCapToParent(adjacentChildCap, recoveredCaps);
                        cap_makeAdjacent(cap, adjacentCap);
                        setAdjacencyLength(cap, adjacentCap, adjacencyLength);
                    }
                }
            }
            flower_destructEndIterator(endIt);
        }
    }
    flower_destructGroupIterator(groupIt);
}
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap,
        int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) {
    //Account for length of adjacency
    *pathLength += getTerminalAdjacencyLength(cap);
    *nCount += getNumberOfNsInAdjacency(cap);

    Segment *segment = getAdjacentCapsSegment(cap);
    if (segment == NULL) {
        *pathEndCap = cap_getAdjacency(getTerminalCap(cap));
        assert(*pathEndCap != NULL);
        return 0;
    }
    Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment)
    : segment_get5Cap(segment);
    assert(
            cap_getName(adjacentCap) == cap_getName(
                    cap_getAdjacency(getTerminalCap(cap))));

    End *adjacentEnd = cap_getEnd(adjacentCap);
    if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) {
        *pathEndCap = adjacentCap;
        return 1;
    }
    *pathLength += segment_getLength(segment);
    *nCount += getNumberOfNsInSegment(segment);
    return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap),
            pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings);
}
static void getMaximalHaplotypePathsCheck(Flower *flower,
        stSortedSet *segmentSet, const char *eventString, stList *eventStrings) {
    /*
     * Do debug checks that the haplotypes paths are well formed.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) {
            if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) {
                assert(stSortedSet_search(segmentSet, segment) != NULL
                        || stSortedSet_search(segmentSet, segment_getReverse(
                                segment)) != NULL);
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsCheck(group_getNestedFlower(group),
                    segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Cap *getTerminalCap(Cap *cap) {
    Flower *nestedFlower = group_getNestedFlower(end_getGroup(cap_getEnd(cap)));
    if (nestedFlower != NULL) {
        Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
        assert(nestedCap != NULL);
        return getTerminalCap(cap_getOrientation(cap) ? nestedCap : cap_getReverse(nestedCap));
    }
    return cap;
}
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) {
    stList *maximalHaplotypePaths = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    stSortedSet *segmentSet = stSortedSet_construct();
    getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings);

    //Do some debug checks..
    st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length(
            maximalHaplotypePaths));
    getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings);
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n",
                stList_length(maximalHaplotypePath));
        assert(stList_length(maximalHaplotypePath) > 0);
        Segment *_5Segment = stList_get(maximalHaplotypePath, 0);
        Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length(
                maximalHaplotypePath) - 1);
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings));
        }
        if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) {
            assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings));
        }
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) {
            _5Segment = stList_get(maximalHaplotypePath, j);
            _3Segment = stList_get(maximalHaplotypePath, j + 1);
            assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings));
            assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings));
            assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment)))
                    == getTerminalCap(segment_get5Cap(_3Segment)));
            assert(strcmp(event_getHeader(segment_getEvent(_5Segment)),
                   eventString) == 0);
            assert(strcmp(event_getHeader(segment_getEvent(_3Segment)),
                    eventString) == 0);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment))));
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment))));
        }
    }

    stSortedSet_destruct(segmentSet);

    return maximalHaplotypePaths;
}
static void getMaximalHaplotypePathsP(Flower *flower,
        stList *maximalHaplotypePaths, stSortedSet *segmentSet,
        const char *eventString,
        stList *eventStrings) {
    /*
     *  Iterate through the segments in this flower.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (stSortedSet_search(segmentSet, segment) == NULL
                && stSortedSet_search(segmentSet, segment_getReverse(segment))
                        == NULL) { //Check we haven't yet seen this segment
            if (strcmp(event_getHeader(segment_getEvent(segment)), eventString)
                    == 0) { //Check if the segment is in the assembly
                if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment
                    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                    stList *maximalHaplotypePath = stList_construct();
                    stList_append(maximalHaplotypePaths, maximalHaplotypePath);
                    getMaximalHaplotypePathsP2(segment, maximalHaplotypePath,
                            segmentSet, eventStrings);
                } else {
                    assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                }
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);
    /*
     * Now recurse on the contained flowers.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsP(group_getNestedFlower(group),
                    maximalHaplotypePaths, segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
void bottomUp(stList *flowers, stKVDatabase *sequenceDatabase, Name referenceEventName,
              bool isTop, stMatrix *(*generateSubstitutionMatrix)(double)) {
    /*
     * A reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    stList *caps = getCaps(flowers, referenceEventName);
    for (int64_t i = stList_length(caps) - 1; i >= 0; i--) { //Start from end, as we add to this list.
        setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(stList_get(caps, i), caps);
    }
    for(int64_t i=0; i<stList_length(flowers); i++) {
        recoverBrokenAdjacencies(stList_get(flowers, i), caps, referenceEventName);
    }

    //Build the phylogenetic event trees for base calling.
    segmentWriteFn_flowerToPhylogeneticTreeHash = stHash_construct2(NULL, (void (*)(void *))cleanupPhylogeneticTree);
    for(int64_t i=0; i<stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Event *refEvent = eventTree_getEvent(flower_getEventTree(flower), referenceEventName);
        assert(refEvent != NULL);
        stHash_insert(segmentWriteFn_flowerToPhylogeneticTreeHash, flower, getPhylogeneticTreeRootedAtGivenEvent(refEvent, generateSubstitutionMatrix));
    }

    if (isTop) {
        stList *threadStrings = buildRecursiveThreadsInList(sequenceDatabase, caps, segmentWriteFn,
                terminalAdjacencyWriteFn);
        assert(stList_length(threadStrings) == stList_length(caps));

        int64_t nonTrivialSeqIndex = 0, trivialSeqIndex = stList_length(threadStrings); //These are used as indices for the names of trivial and non-trivial sequences.
        for (int64_t i = 0; i < stList_length(threadStrings); i++) {
            Cap *cap = stList_get(caps, i);
            assert(cap_getStrand(cap));
            assert(!cap_getSide(cap));
            Flower *flower = end_getFlower(cap_getEnd(cap));
            char *threadString = stList_get(threadStrings, i);
            bool trivialString = isTrivialString(&threadString); //This alters the original string
            MetaSequence *metaSequence = addMetaSequence(flower, cap, trivialString ? trivialSeqIndex++ : nonTrivialSeqIndex++,
                    threadString, trivialString);
            free(threadString);
            int64_t endCoordinate = setCoordinates(flower, metaSequence, cap, metaSequence_getStart(metaSequence) - 1);
            (void) endCoordinate;
            assert(endCoordinate == metaSequence_getLength(metaSequence) + metaSequence_getStart(metaSequence));
        }
        stList_setDestructor(threadStrings, NULL); //The strings are already cleaned up by the above loop
        stList_destruct(threadStrings);
    } else {
        buildRecursiveThreads(sequenceDatabase, caps, segmentWriteFn, terminalAdjacencyWriteFn);
    }
    stHash_destruct(segmentWriteFn_flowerToPhylogeneticTreeHash);
    stList_destruct(caps);
}
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) {
    /*
     * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them.
     * Used to build the reference sequence bottom up.
     *
     * One complexity is that a reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    while (1) {
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getCoordinate(cap) == INT64_MAX);
        assert(cap_getCoordinate(adjacentCap) == INT64_MAX);
        assert(cap_getStrand(cap) == cap_getStrand(adjacentCap));
        assert(cap_getSide(cap) != cap_getSide(adjacentCap));
        Group *group = end_getGroup(cap_getEnd(cap));
        assert(group != NULL);
        if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence.
            Flower *nestedFlower = group_getNestedFlower(group);
            Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
            assert(nestedCap != NULL);
            Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap));
            assert(nestedAdjacentCap != NULL);
            Cap *breakerCap;
            int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap);
            assert(cap_getOrientation(nestedAdjacentCap));
            if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level.
                //Copy cap into higher level graph.
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(cap_getSide(breakerCap));
                cap_makeAdjacent(cap, breakerCap);
                setAdjacencyLength(cap, breakerCap, adjacencyLength);
                adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap);
                assert(cap_getPositiveOrientation(breakerCap) != cap);
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(!cap_getSide(breakerCap));
                cap_makeAdjacent(breakerCap, adjacentCap);
                setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength);
            } else { //The thread is not broken at the lower level
                setAdjacencyLength(cap, adjacentCap, adjacencyLength);
            }
        } else {
            //Set the coordinates of the caps to the adjacency size
            setAdjacencyLength(cap, adjacentCap, 0);
        }
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
    }
}
Segment *getCapsSegment(Cap *cap) {
    if (cap_getSegment(cap) != NULL) {
        return cap_getSegment(cap);
    }
    assert(!end_isBlockEnd(cap_getEnd(cap)));
    assert(end_isStubEnd(cap_getEnd(cap)));
    //Walk up to get the next adjacency.
    Group *parentGroup = flower_getParentGroup(end_getFlower(cap_getEnd(cap)));
    if (parentGroup != NULL) {
        Cap *parentCap = flower_getCap(group_getFlower(parentGroup), cap_getName(cap));
        if (parentCap != NULL) {
            assert(cap_getOrientation(parentCap));
            if (!cap_getOrientation(cap)) {
                parentCap = cap_getReverse(parentCap);
            }
            return getCapsSegment(parentCap);
        } else { //Cap must be a free stub end.
            assert(0); //Not in the current alignments.
            assert(end_isFree(cap_getEnd(cap)));
        }
    }
    return NULL;
}
void mapBlockToExon(Cap *cap, int level, FILE *fileHandle){
   fprintf(fileHandle, "\t\t\t<block>\n");
   Block *block = end_getBlock(cap_getEnd(cap));
   Chain *chain = block_getChain(block);
   int start = cap_getCoordinate(cap);
   int end = cap_getCoordinate(cap_getOtherSegmentCap(cap)) +1;
   fprintf(fileHandle, "\t\t\t\t<blockName>%s</blockName>\n", cactusMisc_nameToString(block_getName(block)));
   if(chain != NULL){
      fprintf(fileHandle, "\t\t\t\t<chainName>%s</chainName>\n", cactusMisc_nameToString(chain_getName(chain)));
   }else{
      fprintf(fileHandle, "\t\t\t\t<chainName>NA</chainName>\n");
   }
   fprintf(fileHandle, "\t\t\t\t<level>%d</level>\n", level);
   fprintf(fileHandle, "\t\t\t\t<start>%d</start>\n", start);
   fprintf(fileHandle, "\t\t\t\t<end>%d</end>\n", end);
   fprintf(fileHandle, "\t\t\t</block>\n");
   st_logInfo("mapBlockToExon: start: %d, end: %d\n", start, end);
}
static Cap *copyCapToParent(Cap *cap, stList *recoveredCaps) {
    /*
     * Get the adjacent stub end by looking at the reference adjacency in the parent.
     */
    End *end = cap_getEnd(cap);
    assert(end != NULL);
    Group *parentGroup = flower_getParentGroup(end_getFlower(end));
    assert(parentGroup != NULL);
    End *copiedEnd = end_copyConstruct(end, group_getFlower(parentGroup));
    end_setGroup(copiedEnd, parentGroup); //Set group
    Cap *copiedCap = end_getInstance(copiedEnd, cap_getName(cap));
    assert(copiedCap != NULL);
    copiedCap = cap_getStrand(copiedCap) ? copiedCap : cap_getReverse(copiedCap);
    if (!cap_getSide(copiedCap)) {
        stList_append(recoveredCaps, copiedCap);
    }
    return copiedCap;
}
Exemple #19
0
int64_t flower_getTotalBaseLength(Flower *flower) {
    /*
     * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them.
     */
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    int64_t totalLength = 0;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        if (!end_isBlockEnd(end)) {
            End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
            Cap *cap;
            while ((cap = end_getNext(instanceIterator)) != NULL) {
                cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) {
                    Cap *cap2 = cap_getAdjacency(cap);
                    assert(cap2 != NULL);
                    while (end_isBlockEnd(cap_getEnd(cap2))) {
                        Segment *segment = cap_getSegment(cap2);
                        assert(segment != NULL);
                        assert(segment_get5Cap(segment) == cap2);
                        cap2 = cap_getAdjacency(segment_get3Cap(segment));
                        assert(cap2 != NULL);
                        assert(cap_getStrand(cap2));
                        assert(cap_getSide(cap2));
                    }
                    assert(cap_getStrand(cap2));
                    assert(cap_getSide(cap2));
                    int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1;
                    assert(length >= 0);
                    totalLength += length;
                }
            }
            end_destructInstanceIterator(instanceIterator);
        }
    }
    flower_destructEndIterator(endIterator);
    return totalLength;
}
Exemple #20
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}
enum CapCode getCapCode(Cap *cap, Cap **otherCap, stList *haplotypeEventStrings, stList *contaminationEventStrings, int64_t *insertLength,
        int64_t *deleteLength, CapCodeParameters *capCodeParameters) {
    assert(hasCapInEvents(cap_getEnd(cap), haplotypeEventStrings));
    if (trueAdjacency(cap, haplotypeEventStrings)) {
        return getHaplotypeSwitchCode(cap, haplotypeEventStrings);
    }
    *insertLength = 0;
    *deleteLength = 0;

    End *end = cap_getEnd(cap);
    Cap *pathEndCap = NULL;
    int64_t pathLength = 0, nCount = 0;
    bool pathEndsOnStub = !getCapGetAtEndOfPath(cap, &pathEndCap, &pathLength,
            &nCount, haplotypeEventStrings, contaminationEventStrings);
    *otherCap = pathEndCap;
    assert(pathLength >= 0);
    assert(nCount >= 0);
    assert(pathEndCap != NULL);
    End *otherPathEnd = cap_getEnd(pathEndCap);
    nCount += getBoundingNs(cap) + getBoundingNs(pathEndCap);

    if (pathEndsOnStub) {
        assert(!hasCapInEvents(otherPathEnd, contaminationEventStrings)); //Can not test for hap event strings, as a stub end may contain the reference.
        //assert(!hasCapInEvents(otherPathEnd, haplotypeEventStrings)); //isHaplotypeEnd(otherPathEnd) && !isContaminationEnd(otherPathEnd));
        return pathLength == 0 ? CONTIG_END
        : (nCount >= 1 ? (nCount >= capCodeParameters->minimumNCount ? CONTIG_END_WITH_SCAFFOLD_GAP
                        : CONTIG_END_WITH_AMBIGUITY_GAP)
                : ERROR_CONTIG_END_WITH_INSERT);
    }

    if (hasCapInEvents(otherPathEnd, haplotypeEventStrings)) {
        if (endsAreConnected(end, otherPathEnd, haplotypeEventStrings)) {
            int64_t minimumHaplotypeDistanceBetweenEnds;
            //Establish if indel or order breaking rearrangement
            if (endsAreAdjacent(end, otherPathEnd,
                            &minimumHaplotypeDistanceBetweenEnds, haplotypeEventStrings)) {
                *insertLength = pathLength;
                *deleteLength = minimumHaplotypeDistanceBetweenEnds;
                if (nCount >= capCodeParameters->minimumNCount) { //Insertion was scaffold gap
                    return SCAFFOLD_GAP;
                }
                if (nCount >= 1) {
                    return AMBIGUITY_GAP;
                }
                if (pathLength > 0) { //Had insertion
                    if (minimumHaplotypeDistanceBetweenEnds > 0) { //Had deletion also
                        return (minimumHaplotypeDistanceBetweenEnds
                                >= capCodeParameters->maxDeletionLength
                                || pathLength
                                >= capCodeParameters->maxInsertionLength) ? ERROR_HAP_TO_HAP_SAME_CHROMOSOME
                        : ERROR_HAP_TO_INSERT_AND_DELETION;
                    }
                    return pathLength >= capCodeParameters->maxInsertionLength ? ERROR_HAP_TO_HAP_SAME_CHROMOSOME
                    : ERROR_HAP_TO_INSERT;
                } else { //Deletion, possibly with Ns
                    assert(minimumHaplotypeDistanceBetweenEnds > 0);
                    return minimumHaplotypeDistanceBetweenEnds
                    >= capCodeParameters->maxDeletionLength ? ERROR_HAP_TO_HAP_SAME_CHROMOSOME
                    : ERROR_HAP_TO_DELETION;
                }
            } else {
                return ERROR_HAP_TO_HAP_SAME_CHROMOSOME;
            }
        } else {
            return ERROR_HAP_TO_HAP_DIFFERENT_CHROMOSOMES;
        }
    } else {
        return pathLength == 0 ? ERROR_HAP_TO_CONTAMINATION
        : ERROR_HAP_TO_INSERT_TO_CONTAMINATION;
    }
}
Exemple #23
0
int main(int argc, char *argv[]) {
    st_setLogLevelFromString(argv[1]);
    st_logDebug("Set up logging\n");

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logDebug("Set up the flower disk\n");

    Name flowerName = cactusMisc_stringToName(argv[3]);
    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    int64_t totalBases = flower_getTotalBaseLength(flower);
    int64_t totalEnds = flower_getEndNumber(flower);
    int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower);
    int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower);
    int64_t totalCaps = flower_getCapNumber(flower);
    int64_t totalBlocks = flower_getBlockNumber(flower);
    int64_t totalGroups = flower_getGroupNumber(flower);
    int64_t totalChains = flower_getChainNumber(flower);
    int64_t totalLinkGroups = 0;
    int64_t maxEndDegree = 0;
    int64_t maxAdjacencyLength = 0;
    int64_t totalEdges = 0;

    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while((end = flower_getNextEnd(endIt)) != NULL) {
        assert(end_getOrientation(end));
        if(end_getInstanceNumber(end) > maxEndDegree) {
            maxEndDegree = end_getInstanceNumber(end);
        }
        stSortedSet *ends = stSortedSet_construct();
        End_InstanceIterator *capIt = end_getInstanceIterator(end);
        Cap *cap;
        while((cap = end_getNext(capIt)) != NULL) {
            if(cap_getSequence(cap) != NULL) {
                Cap *adjacentCap = cap_getAdjacency(cap);
                assert(adjacentCap != NULL);
                End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap));
                stSortedSet_insert(ends, adjacentEnd);
                int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
                if(adjacencyLength < 0) {
                    adjacencyLength *= -1;
                }
                assert(adjacencyLength >= 1);
                if(adjacencyLength >= maxAdjacencyLength) {
                    maxAdjacencyLength = adjacencyLength;
                }
            }
        }
        end_destructInstanceIterator(capIt);
        totalEdges += stSortedSet_size(ends);
        if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works.
            totalEdges += 1;
        }
        stSortedSet_destruct(ends);
    }
    assert(totalEdges % 2 == 0);
    flower_destructEndIterator(endIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        if(group_getLink(group) != NULL) {
            totalLinkGroups++;
        }
    }
    flower_destructGroupIterator(groupIt);

    printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n",
            flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups);

    return 0;
}
int mapGene(Cap *cap, int level, int exon, struct bed *gene, FILE *fileHandle){
   /*
    *Following cactus adjacencies, starting from 'cap', find regions that overlap with 
    *exons of input gene. Report chain relations of these regions with the exons.
    *cap: current cap. Level = chain level. exon = exon number. gene = bed record of gene
    */
   int64_t exonStart, exonEnd;
   if(isStubCap(cap)){
      Group *group = end_getGroup(cap_getEnd(cap));
      Flower *nestedFlower = group_getNestedFlower(group);
      if(nestedFlower != NULL){//recursive call
         Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap));
         assert(childCap != NULL);
         exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
         exonStart = gene->chromStarts->list[exon] + gene->chromStart;
         exonEnd = exonStart + gene->blockSizes->list[exon];
      }
   }

   cap = cap_getAdjacency(cap);
   Cap *nextcap;
   int64_t capCoor;
   exonStart = gene->chromStarts->list[exon] + gene->chromStart;
   exonEnd = exonStart + gene->blockSizes->list[exon];
   Block *block = end_getBlock(cap_getEnd(cap));  
 
   if(block == NULL){
      moveCapToNextBlock(&cap);
   }
   while(!isStubCap(cap) && exon < gene->blockCount){
      End *cend = cap_getEnd(cap);
      capCoor = cap_getCoordinate(cap);//Cap coordinate is always the coordinate on + strand
      nextcap = cap_getAdjacency(cap_getOtherSegmentCap(cap));
      st_logInfo("capCoor: %d, nextCap: %d, eStart: %d, eEnd: %d. Exon: %d\n", 
                  capCoor, cap_getCoordinate(nextcap), exonStart, exonEnd, exon);

      //keep moving if nextBlock Start is still upstream of current exon
      if(cap_getCoordinate(nextcap) <= exonStart){
         moveCapToNextBlock(&cap);
         st_logInfo("Still upstream, nextcap <= exonStart. Move to next chainBlock\n");
      }else if(capCoor >= exonEnd){//Done with current exon, move to next
         st_logInfo("Done with current exon, move to next one\n\n");
         fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
         exon++;
         if(exon < gene->blockCount){
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
            fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
         }
      }else{//current exon overlaps with current block Or with lower level flower
         Cap *oppcap = cap_getOtherSegmentCap(cap);
         st_logInfo("Current exon overlaps with current block or with lower flower\n");
         if(cap_getCoordinate(oppcap) >= exonStart && exonEnd > capCoor){
            mapBlockToExon(cap, level, fileHandle);
            if(exonEnd <= cap_getCoordinate(oppcap) + 1){
               st_logInfo("Done with current exon, move to next one\n\n");
               fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
               exon++;
	       if(exon < gene->blockCount){
		  exonStart = gene->chromStarts->list[exon] + gene->chromStart;
		  exonEnd = exonStart + gene->blockSizes->list[exon];
		  fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
	       }
               continue;
            }
         }
         //Traverse lower level flowers if exists
         Group *group = end_getGroup(end_getOtherBlockEnd(cend));
         Flower *nestedFlower = group_getNestedFlower(group);
         if(nestedFlower != NULL){//recursive call
            Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap_getOtherSegmentCap(cap)));
            assert(childCap != NULL);
            exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
         }
         moveCapToNextBlock(&cap);
      }
   }
   return exon;
}