/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module
 */
static void buildFaces_fillTopNodeList(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges;
    int64_t index;

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Recursion through lifted edges
    if ((liftedEdges = stHash_search(liftedEdgesTable, cap)))
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap,
        int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) {
    //Account for length of adjacency
    *pathLength += getTerminalAdjacencyLength(cap);
    *nCount += getNumberOfNsInAdjacency(cap);

    Segment *segment = getAdjacentCapsSegment(cap);
    if (segment == NULL) {
        *pathEndCap = cap_getAdjacency(getTerminalCap(cap));
        assert(*pathEndCap != NULL);
        return 0;
    }
    Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment)
    : segment_get5Cap(segment);
    assert(
            cap_getName(adjacentCap) == cap_getName(
                    cap_getAdjacency(getTerminalCap(cap))));

    End *adjacentEnd = cap_getEnd(adjacentCap);
    if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) {
        *pathEndCap = adjacentCap;
        return 1;
    }
    *pathLength += segment_getLength(segment);
    *nCount += getNumberOfNsInSegment(segment);
    return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap),
            pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings);
}
bool trueAdjacency(Cap *cap, stList *eventStrings) {
	if(getTerminalAdjacencyLength(cap) > 0) {
        return 0;
    }
	cap = getTerminalCap(cap);
    assert(cap != NULL);
    Cap *otherCap = cap_getAdjacency(cap);
    assert(otherCap != NULL);
    assert(cap_getAdjacency(otherCap) == cap);
    //So is the adjacency present in one of the haplotypes? That's what we're going to answer..
    End *otherEnd = end_getPositiveOrientation(cap_getEnd(otherCap));
    End_InstanceIterator *endInstanceIt = end_getInstanceIterator(cap_getEnd(cap));
    Cap *cap2;
    while ((cap2 = end_getNext(endInstanceIt)) != NULL) {
        Cap *otherCap2 = cap_getAdjacency(cap2);
        assert(otherCap2 != NULL);
        if (otherEnd == end_getPositiveOrientation(cap_getEnd(otherCap2))) {
            //const char *eventName = event_getHeader(cap_getEvent(cap2));
            assert(event_getHeader(cap_getEvent(cap2)) == event_getHeader(
                            cap_getEvent(otherCap2)));
            if (capHasGivenEvents(cap2, eventStrings)) { //strcmp(eventName, "hapA1") == 0 || strcmp(eventName, "hapA2") == 0) {
                if(getTerminalAdjacencyLength(cap2) == 0) {
                    end_destructInstanceIterator(endInstanceIt);
                    return 1;
                }
            }
        }
    }
    end_destructInstanceIterator(endInstanceIt);
    return 0;
}
/*
 * Constructs a face from a given Cap
 */
static void buildFaces_constructFromCap(Cap * startingCap,
        stHash *liftedEdgesTable, Flower * flower) {
    Face *face = face_construct(flower);
    stList *topNodes = stList_construct3(16, NULL);
    stList *liftedEdges;
    Cap *cap, *bottomNode, *ancestor;
    int64_t index, index2;

    printf("Constructing new face");

    // Establishlist of top nodes
    buildFaces_fillTopNodeList(startingCap, topNodes, liftedEdgesTable);

#ifndef NDEBUG
    // What, no top nodes!?
    if (stList_length(topNodes) == 0)
        abort();
#endif

    // Initialize data structure
    face_allocateSpace(face, stList_length(topNodes));

    // For every top node
    for (index = 0; index < stList_length(topNodes); index++) {
        cap = stList_get(topNodes, index);
        face_setTopNode(face, index, cap);
        liftedEdges = stHash_search(liftedEdgesTable, cap);

        if (!liftedEdges) {
            face_setBottomNodeNumber(face, index, 0);
            continue;
        }

        face_setBottomNodeNumber(face, index, stList_length(liftedEdges));
        // For every bottom node of that top node
        for (index2 = 0; index2 < stList_length(liftedEdges); index2++) {
            bottomNode
                    = ((LiftedEdge *) stList_get(liftedEdges, index2))->bottomNode;
            face_addBottomNode(face, index, bottomNode);
            ancestor = cap_getTopCap(cap_getPositiveOrientation(
                    cap_getAdjacency(bottomNode)));
            if (cap_getAdjacency(cap) != ancestor)
                face_setDerivedDestination(face, index, index2, ancestor);
            else
                face_setDerivedDestination(face, index, index2, NULL);

#ifndef NDEBUG
            // If bottom nodes part of top nodes
            assert(!stList_contains(topNodes, cap_getPositiveOrientation(
                    ((LiftedEdge*) stList_get(liftedEdges, index2))->bottomNode)));
#endif
        }
    }

    // Clean up
    stList_destruct(topNodes);
}
static int64_t setCoordinates(Flower *flower, MetaSequence *metaSequence, Cap *cap, int64_t coordinate) {
    /*
     * Sets the coordinates of the reference thread and sets the bases of the actual sequence
     * that of the consensus.
     */
    Sequence *sequence = flower_getSequence(flower, metaSequence_getName(metaSequence));
    if (sequence == NULL) {
        sequence = sequence_construct(metaSequence, flower);
    }
    while (1) {
        assert(cap_getStrand(cap));
        assert(!cap_getSide(cap));
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getStrand(adjacentCap));
        assert(cap_getSide(adjacentCap));
        int64_t adjacencyLength = cap_getCoordinate(cap);
        assert(adjacencyLength == cap_getCoordinate(adjacentCap));
        assert(adjacencyLength != INT64_MAX);
        assert(adjacencyLength >= 0);
        cap_setCoordinates(cap, coordinate, 1, sequence);
        coordinate += adjacencyLength + 1;
        cap_setCoordinates(adjacentCap, coordinate, 1, sequence);
        //Traverse any block..
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
        coordinate += segment_getLength(cap_getSegment(adjacentCap)) - 1;
    }
    return coordinate;
}
static enum CapCode getHaplotypeSwitchCode(Cap *cap, stList *eventStrings) {
    Cap *adjacentCap = cap_getAdjacency(getTerminalCap(cap));
    assert(adjacentCap != NULL);
    End *end = cap_getEnd(cap);
    End *adjacentEnd = cap_getEnd(adjacentCap);
    stSortedSet *eventStringsForEnd1 = getEventStrings(end, eventStrings);
    stSortedSet *eventStringsForEnd2 = getEventStrings(adjacentEnd, eventStrings);

    assert(stSortedSet_size(eventStringsForEnd1) > 0);
    assert(stSortedSet_size(eventStringsForEnd2) > 0);

    stSortedSet *intersectionOfEventStrings = stSortedSet_getIntersection(
            eventStringsForEnd1, eventStringsForEnd2);

    enum CapCode code1 = (stSortedSet_size(intersectionOfEventStrings)
            != stSortedSet_size(eventStringsForEnd1) || stSortedSet_size(
                    intersectionOfEventStrings)
            != stSortedSet_size(eventStringsForEnd2)) ? HAP_SWITCH
    : HAP_NOTHING;

    stSortedSet_destruct(eventStringsForEnd1);
    stSortedSet_destruct(eventStringsForEnd2);
    stSortedSet_destruct(intersectionOfEventStrings);

    return code1;
}
static int64_t traceThreadLength(Cap *cap, Cap **terminatingCap) {
    /*
     * Gets the length in bases of the thread in the flower, starting from a given attached stub cap.
     * The thread length includes the lengths of adjacencies that it contains.
     *
     * Terminating cap is initialised with the final cap on the thread from cap.
     */
    assert(end_isStubEnd(cap_getEnd(cap)));
    int64_t threadLength = 0;
    while (1) {
        assert(cap_getCoordinate(cap) != INT64_MAX);
        int64_t adjacencyLength = cap_getCoordinate(cap);
        threadLength += adjacencyLength;
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(adjacencyLength == cap_getCoordinate(adjacentCap));
        //Traverse any block..
        if (cap_getSegment(adjacentCap) != NULL) {
            threadLength += segment_getLength(cap_getSegment(adjacentCap));
            cap = cap_getOtherSegmentCap(adjacentCap);
            assert(cap != NULL);
        } else {
            assert(end_isStubEnd(cap_getEnd(adjacentCap)));
            *terminatingCap = adjacentCap;
            return threadLength;
        }
    }
    return 1;
}
/*
 * Fill uplist with bottom nodes for top node
 */
static void buildFaces_computeLiftedEdgesAtTopNode(Cap * cap, stList * liftedEdges) {
    int64_t childIndex;
    LiftedEdge * liftedEdge;

    // Termination
    if (cap_getAdjacency(cap)) {
	liftedEdge = st_malloc(sizeof(LiftedEdge));
	liftedEdge->bottomNode = cap_getPositiveOrientation(cap);
	liftedEdge->destination = cap_getPositiveOrientation(cap_getTopCap(cap_getAdjacency(cap)));
	stList_append(liftedEdges, liftedEdge);
	return;
    }
    
    // Recursion through children
    for (childIndex = 0; childIndex < cap_getChildNumber(cap); childIndex++) 
	buildFaces_computeLiftedEdgesAtTopNode(cap_getChild(cap, childIndex), liftedEdges);
}
/*
 * Fill in a hashtable which to every node associates
 * alist of lifted edges
 */
static stHash *buildFaces_computeLiftedEdges(Flower * flower) {
    stHash *liftedEdgesTable = stHash_construct3(buildFaces_hashfunction,
            buildFaces_key_eq_fn, NULL, buildFaces_destructValue);
    Flower_CapIterator *iter = flower_getCapIterator(flower);
    Cap *cap, *attachedAncestor;
    Cap *adjacency, *adjacencyAncestor;
    stList *liftedEdges;
    LiftedEdge *liftedEdge;

    // Iterate through potential bottom nodes
    while ((cap = flower_getNextCap(iter))) {
        // ... check if connected
        if ((adjacency = cap_getAdjacency(cap))) {
            // ... lift
            attachedAncestor = cap_getTopCap(cap);
            adjacencyAncestor = cap_getTopCap(cap_getPositiveOrientation(
                    adjacency));

#ifndef NDEBUG
            assert((attachedAncestor && adjacencyAncestor) || (!attachedAncestor && !adjacencyAncestor));
#endif

            // If root node
            if (attachedAncestor == NULL)
                continue;

            // ... create lifted edge
            liftedEdge = st_malloc(sizeof(LiftedEdge));
            liftedEdge->destination = adjacencyAncestor;
            liftedEdge->bottomNode = cap;

#ifndef NDEBUG
            // Self loop
            if (adjacencyAncestor == attachedAncestor)
                abort();
#endif

            // ... add it to the hashtable
            if ((liftedEdges
                    = stHash_search(liftedEdgesTable, attachedAncestor))) {
                stList_append(liftedEdges, liftedEdge);
            } else {
                liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
                stList_append(liftedEdges, liftedEdge);
                stHash_insert(liftedEdgesTable, attachedAncestor, liftedEdges);
            }
        }
    }

    flower_destructCapIterator(iter);
    return liftedEdgesTable;
}
static void setAdjacencyLengthsAndRecoverNewCapsAndBrokenAdjacencies(Cap *cap, stList *recoveredCaps) {
    /*
     * Sets the coordinates of the caps to be equal to the length of the adjacency sequence between them.
     * Used to build the reference sequence bottom up.
     *
     * One complexity is that a reference thread between the two caps
     * in each flower f may be broken into two in the children of f.
     * Therefore, for each flower f first identify attached stub ends present in the children of f that are
     * not present in f and copy them into f, reattaching the reference caps as needed.
     */
    while (1) {
        Cap *adjacentCap = cap_getAdjacency(cap);
        assert(adjacentCap != NULL);
        assert(cap_getCoordinate(cap) == INT64_MAX);
        assert(cap_getCoordinate(adjacentCap) == INT64_MAX);
        assert(cap_getStrand(cap) == cap_getStrand(adjacentCap));
        assert(cap_getSide(cap) != cap_getSide(adjacentCap));
        Group *group = end_getGroup(cap_getEnd(cap));
        assert(group != NULL);
        if (!group_isLeaf(group)) { //Adjacency is not terminal, so establish its sequence.
            Flower *nestedFlower = group_getNestedFlower(group);
            Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
            assert(nestedCap != NULL);
            Cap *nestedAdjacentCap = flower_getCap(nestedFlower, cap_getName(adjacentCap));
            assert(nestedAdjacentCap != NULL);
            Cap *breakerCap;
            int64_t adjacencyLength = traceThreadLength(nestedCap, &breakerCap);
            assert(cap_getOrientation(nestedAdjacentCap));
            if (cap_getPositiveOrientation(breakerCap) != nestedAdjacentCap) { //The thread is broken at the lower level.
                //Copy cap into higher level graph.
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(cap_getSide(breakerCap));
                cap_makeAdjacent(cap, breakerCap);
                setAdjacencyLength(cap, breakerCap, adjacencyLength);
                adjacencyLength = traceThreadLength(nestedAdjacentCap, &breakerCap);
                assert(cap_getPositiveOrientation(breakerCap) != cap);
                breakerCap = copyCapToParent(breakerCap, recoveredCaps);
                assert(!cap_getSide(breakerCap));
                cap_makeAdjacent(breakerCap, adjacentCap);
                setAdjacencyLength(adjacentCap, breakerCap, adjacencyLength);
            } else { //The thread is not broken at the lower level
                setAdjacencyLength(cap, adjacentCap, adjacencyLength);
            }
        } else {
            //Set the coordinates of the caps to the adjacency size
            setAdjacencyLength(cap, adjacentCap, 0);
        }
        if ((cap = cap_getOtherSegmentCap(adjacentCap)) == NULL) {
            break;
        }
    }
}
/*
 * Recursive function which fills a givenlist with the
 * connected nodes within a module and fills their lifted
 * edges in the same pass
 */
static void buildFaces_fillTopNodeList2(Cap * cap, stList *list,
        stHash *liftedEdgesTable) {
    stList *liftedEdges = stList_construct3(2,
                        buildFaces_stList_destructElem);
    int64_t index;

    // Orientation check
    cap = cap_getPositiveOrientation(cap);

    // Limit of recursion
    if (stList_contains(list, cap))
        return;

    // Actual filling
    st_logInfo("Adding cap %p to face\n", cap);
    stList_append(list, cap);

    // Compute lifted edges
    for (index = 0; index < cap_getChildNumber(cap); index++) 
	buildFaces_computeLiftedEdgesAtTopNode(cap_getChild(cap, index), liftedEdges);

    // If emptylist...
    if (stList_length(liftedEdges) == 0) 
	stList_destruct(liftedEdges);
    // Recursion through lifted edges
    else {
	stHash_insert(liftedEdgesTable, cap, liftedEdges);
        for (index = 0; index < stList_length(liftedEdges); index++)
            buildFaces_fillTopNodeList2(
                    ((LiftedEdge *) stList_get(liftedEdges, index))->destination,
                   list, liftedEdgesTable);
    }

    // Recursion through adjacency
    if (cap_getAdjacency(cap))
        buildFaces_fillTopNodeList2(cap_getAdjacency(cap),list,
                liftedEdgesTable);
}
Beispiel #12
0
int64_t flower_getTotalBaseLength(Flower *flower) {
    /*
     * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them.
     */
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    int64_t totalLength = 0;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        if (!end_isBlockEnd(end)) {
            End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
            Cap *cap;
            while ((cap = end_getNext(instanceIterator)) != NULL) {
                cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) {
                    Cap *cap2 = cap_getAdjacency(cap);
                    assert(cap2 != NULL);
                    while (end_isBlockEnd(cap_getEnd(cap2))) {
                        Segment *segment = cap_getSegment(cap2);
                        assert(segment != NULL);
                        assert(segment_get5Cap(segment) == cap2);
                        cap2 = cap_getAdjacency(segment_get3Cap(segment));
                        assert(cap2 != NULL);
                        assert(cap_getStrand(cap2));
                        assert(cap_getSide(cap2));
                    }
                    assert(cap_getStrand(cap2));
                    assert(cap_getSide(cap2));
                    int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1;
                    assert(length >= 0);
                    totalLength += length;
                }
            }
            end_destructInstanceIterator(instanceIterator);
        }
    }
    flower_destructEndIterator(endIterator);
    return totalLength;
}
Beispiel #13
0
Segment *block_splitP(Segment *segment,
		Block *leftBlock, Block *rightBlock) {
	Segment *leftSegment = segment_getSequence(segment) != NULL
				? segment_construct2(leftBlock, segment_getStart(segment), segment_getStrand(segment), segment_getSequence(segment))
				: segment_construct(leftBlock, segment_getEvent(segment));
				Segment *rightSegment = segment_getSequence(segment) != NULL
				? segment_construct2(rightBlock, segment_getStart(segment) + block_getLength(leftBlock), segment_getStrand(segment), segment_getSequence(segment))
						: segment_construct(rightBlock, segment_getEvent(segment));
	//link together.
	cap_makeAdjacent(segment_get3Cap(leftSegment), segment_get5Cap(rightSegment));
	//update adjacencies.
	Cap *_5Cap = segment_get5Cap(segment);
	Cap *new5Cap = segment_get5Cap(leftSegment);
	Cap *_3Cap = segment_get3Cap(segment);
	Cap *new3Cap = segment_get3Cap(rightSegment);
	if(cap_getAdjacency(_5Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_5Cap), new5Cap);
	}
	if(cap_getAdjacency(_3Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_3Cap), new3Cap);
	}
	return leftSegment;
}
void topDown(Flower *flower, Name referenceEventName) {
    /*
     * Run on each flower, top down. Sets the coordinates of each reference cap to the correct
     * sequence, and sets the bases of the reference sequence to be consensus bases.
     */
    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIt)) != NULL) {
        Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
        if (cap != NULL) {
            cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
            if (!cap_getSide(cap)) {
                assert(cap_getCoordinate(cap) != INT64_MAX);
                Sequence *sequence = cap_getSequence(cap);
                assert(sequence != NULL);
                Group *group = end_getGroup(end);
                if (!group_isLeaf(group)) {
                    Flower *nestedFlower = group_getNestedFlower(group);
                    Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
                    assert(nestedCap != NULL);
                    nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap);
                    assert(cap_getStrand(nestedCap));
                    assert(!cap_getSide(nestedCap));
                    int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence),
                                                           nestedCap, cap_getCoordinate(cap));
                    (void) endCoordinate;
                    assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap)));
                    assert(endCoordinate
                           == cap_getCoordinate(
                               flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap)))));
                }
            }
        }
    }
    flower_destructEndIterator(endIt);
}
Beispiel #15
0
void testCap_adjacent(CuTest* testCase) {
    cactusCapTestSetup();
    CuAssertTrue(testCase, cap_getAdjacency(leaf1Cap) == NULL);
    CuAssertTrue(testCase, cap_getAdjacency(leaf3Cap) == NULL);
    cap_makeAdjacent(leaf1Cap, leaf3Cap);
    CuAssertTrue(testCase, cap_getAdjacency(leaf1Cap) == cap_getReverse(leaf3Cap));
    CuAssertTrue(testCase, cap_getAdjacency(leaf3Cap) == cap_getReverse(leaf1Cap));
    CuAssertTrue(testCase, cap_getAdjacency(cap_getReverse(leaf1Cap)) == leaf3Cap);
    CuAssertTrue(testCase, cap_getAdjacency(cap_getReverse(leaf3Cap)) == leaf1Cap);
    cactusCapTestTeardown();
}
Beispiel #16
0
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) {
    stList *maximalHaplotypePaths = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    stSortedSet *segmentSet = stSortedSet_construct();
    getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings);

    //Do some debug checks..
    st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length(
            maximalHaplotypePaths));
    getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings);
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n",
                stList_length(maximalHaplotypePath));
        assert(stList_length(maximalHaplotypePath) > 0);
        Segment *_5Segment = stList_get(maximalHaplotypePath, 0);
        Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length(
                maximalHaplotypePath) - 1);
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings));
        }
        if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) {
            assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings));
        }
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) {
            _5Segment = stList_get(maximalHaplotypePath, j);
            _3Segment = stList_get(maximalHaplotypePath, j + 1);
            assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings));
            assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings));
            assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment)))
                    == getTerminalCap(segment_get5Cap(_3Segment)));
            assert(strcmp(event_getHeader(segment_getEvent(_5Segment)),
                   eventString) == 0);
            assert(strcmp(event_getHeader(segment_getEvent(_3Segment)),
                    eventString) == 0);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment))));
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment))));
        }
    }

    stSortedSet_destruct(segmentSet);

    return maximalHaplotypePaths;
}
char *getTerminalAdjacencySubString(Cap *cap) {
    if(getTerminalAdjacencyLength_ignoreAdjacencies) {
        return stString_copy("");
    }
    cap = getTerminalCap(cap);
    cap = cap_getStrand(cap) ? cap : cap_getReverse(cap); //This ensures the asserts are as expected.
    Cap *adjacentCap = cap_getAdjacency(cap);
    int64_t i = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
    assert(i != 0);
    if (i > 0) {
        assert(cap_getSide(cap));
        assert(!cap_getSide(adjacentCap));
        return sequence_getString(cap_getSequence(cap),
                cap_getCoordinate(adjacentCap) + 1, i - 1, 1);
    } else {
        assert(cap_getSide(adjacentCap));
        assert(!cap_getSide(cap));
        return sequence_getString(cap_getSequence(cap), cap_getCoordinate(cap) + 1, -i - 1, 1);
    }
}
Beispiel #18
0
static void block_splitP2(Segment *segment,
		Segment *parentLeftSegment,
		Segment *parentRightSegment,
		Block *leftBlock, Block *rightBlock) {
	Segment *leftSegment = block_splitP(segment, leftBlock, rightBlock);
	Segment *rightSegment = cap_getSegment(cap_getAdjacency(segment_get3Cap(leftSegment)));
	if(parentLeftSegment != NULL) {
		assert(parentRightSegment != NULL);
		segment_makeParentAndChild(parentLeftSegment, leftSegment);
		segment_makeParentAndChild(parentRightSegment, rightSegment);
	}
	else {
		assert(parentRightSegment == NULL);
		block_setRootInstance(leftBlock, leftSegment);
		block_setRootInstance(rightBlock, rightSegment);
	}
	int64_t i;
	for(i=0; i<segment_getChildNumber(segment); i++) {
		block_splitP2(segment_getChild(segment, i), leftSegment, rightSegment, leftBlock, rightBlock);
	}
}
Beispiel #19
0
static stList *getSubstringsForFlowers(stList *flowers) {
    /*
     * Get the set of substrings for sequence intervals in the given set of flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                End_InstanceIterator *instanceIt = end_getInstanceIterator(end);
                Cap *cap;
                while ((cap = end_getNext(instanceIt)) != NULL) {
                    Sequence *sequence;
                    if ((sequence = cap_getSequence(cap)) != NULL) {
                        cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                        if (!cap_getSide(cap)) { //We have a sequence interval of interest
                            Cap *adjacentCap = cap_getAdjacency(cap);
                            assert(adjacentCap != NULL);
                            int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1;
                            assert(length >= 0);
                            if (length > 0) {
                                stList_append(substrings,
                                        substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                                cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length));
                            }
                        }
                    }
                }
                end_destructInstanceIterator(instanceIt);
            }
        }
        flower_destructEndIterator(endIt);
    }
    return substrings;
}
Beispiel #20
0
stSortedSet *makeEndAlignment(StateMachine *sM, End *end, int64_t spanningTrees, int64_t maxSequenceLength,
        bool useProgressiveMerging, float gapGamma,
        PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters) {
    //Make an alignment of the sequences in the ends

    //Get the adjacency sequences to be aligned.
    Cap *cap;
    End_InstanceIterator *it = end_getInstanceIterator(end);
    stList *sequences = stList_construct3(0, (void (*)(void *))adjacencySequence_destruct);
    stList *seqFrags = stList_construct3(0, (void (*)(void *))seqFrag_destruct);
    stHash *endInstanceNumbers = stHash_construct2(NULL, free);
    while((cap = end_getNext(it)) != NULL) {
        if(cap_getSide(cap)) {
            cap = cap_getReverse(cap);
        }
        AdjacencySequence *adjacencySequence = adjacencySequence_construct(cap, maxSequenceLength);
        stList_append(sequences, adjacencySequence);
        assert(cap_getAdjacency(cap) != NULL);
        End *otherEnd = end_getPositiveOrientation(cap_getEnd(cap_getAdjacency(cap)));
        stList_append(seqFrags, seqFrag_construct(adjacencySequence->string, 0, end_getName(otherEnd)));
        //Increase count of seqfrags with a given end.
        int64_t *c = stHash_search(endInstanceNumbers, otherEnd);
        if(c == NULL) {
            c = st_calloc(1, sizeof(int64_t));
            assert(*c == 0);
            stHash_insert(endInstanceNumbers, otherEnd, c);
        }
        (*c)++;
    }
    end_destructInstanceIterator(it);

    //Get the alignment.
    MultipleAlignment *mA = makeAlignment(sM, seqFrags, spanningTrees, 100000000, useProgressiveMerging, gapGamma, pairwiseAlignmentBandingParameters);

    //Build an array of weights to reweight pairs in the alignment.
    int64_t *pairwiseAlignmentsPerSequenceNonCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    int64_t *pairwiseAlignmentsPerSequenceCommonEnds = st_calloc(stList_length(seqFrags), sizeof(int64_t));
    //First build array on number of pairwise alignments to each sequence, distinguishing alignments between sequences sharing
    //common ends.
    for(int64_t i=0; i<stList_length(mA->chosenPairwiseAlignments); i++) {
        stIntTuple *pairwiseAlignment = stList_get(mA->chosenPairwiseAlignments, i);
        int64_t seq1 = stIntTuple_get(pairwiseAlignment, 1);
        int64_t seq2 = stIntTuple_get(pairwiseAlignment, 2);
        assert(seq1 != seq2);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seq1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seq2);
        int64_t *pairwiseAlignmentsPerSequence = seqFrag1->rightEndId == seqFrag2->rightEndId
                ? pairwiseAlignmentsPerSequenceCommonEnds : pairwiseAlignmentsPerSequenceNonCommonEnds;
        pairwiseAlignmentsPerSequence[seq1]++;
        pairwiseAlignmentsPerSequence[seq2]++;
    }
    //Now calculate score adjustments.
    double *scoreAdjustmentsNonCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    double *scoreAdjustmentsCommonEnds = st_malloc(stList_length(seqFrags) * sizeof(double));
    for(int64_t i=0; i<stList_length(seqFrags); i++) {
        SeqFrag *seqFrag = stList_get(seqFrags, i);
        End *otherEnd = flower_getEnd(end_getFlower(end), seqFrag->rightEndId);
        assert(otherEnd != NULL);
        assert(stHash_search(endInstanceNumbers, otherEnd) != NULL);
        int64_t commonInstanceNumber = *(int64_t *)stHash_search(endInstanceNumbers, otherEnd);
        int64_t nonCommonInstanceNumber = stList_length(seqFrags) - commonInstanceNumber;

        assert(commonInstanceNumber > 0 && nonCommonInstanceNumber >= 0);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] <= nonCommonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceNonCommonEnds[i] >= 0);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] < commonInstanceNumber);
        assert(pairwiseAlignmentsPerSequenceCommonEnds[i] >= 0);

        //scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber + commonInstanceNumber - 1)/(pairwiseAlignmentsPerSequenceNonCommonEnds[i] + pairwiseAlignmentsPerSequenceCommonEnds[i]);
        //scoreAdjustmentsCommonEnds[i] = scoreAdjustmentsNonCommonEnds[i];
        if(pairwiseAlignmentsPerSequenceNonCommonEnds[i] > 0) {
            scoreAdjustmentsNonCommonEnds[i] = ((double)nonCommonInstanceNumber)/pairwiseAlignmentsPerSequenceNonCommonEnds[i];
            assert(scoreAdjustmentsNonCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsNonCommonEnds[i] <= nonCommonInstanceNumber);
        }
        else {
            scoreAdjustmentsNonCommonEnds[i] = INT64_MIN;
        }
        if(pairwiseAlignmentsPerSequenceCommonEnds[i] > 0) {
            scoreAdjustmentsCommonEnds[i] = ((double)commonInstanceNumber-1)/pairwiseAlignmentsPerSequenceCommonEnds[i];
            assert(scoreAdjustmentsCommonEnds[i] >= 1.0);
            assert(scoreAdjustmentsCommonEnds[i] <= commonInstanceNumber-1);
        }
        else {
            scoreAdjustmentsCommonEnds[i] = INT64_MIN;
        }
    }

	//Convert the alignment pairs to an alignment of the caps..
    stSortedSet *sortedAlignment =
                stSortedSet_construct3((int (*)(const void *, const void *))alignedPair_cmpFn,
                (void (*)(void *))alignedPair_destruct);
    while(stList_length(mA->alignedPairs) > 0) {
        stIntTuple *alignedPair = stList_pop(mA->alignedPairs);
        assert(stIntTuple_length(alignedPair) == 5);
        int64_t seqIndex1 = stIntTuple_get(alignedPair, 1);
        int64_t seqIndex2 = stIntTuple_get(alignedPair, 3);
        AdjacencySequence *i = stList_get(sequences, seqIndex1);
        AdjacencySequence *j = stList_get(sequences, seqIndex2);
        assert(i != j);
        int64_t offset1 = stIntTuple_get(alignedPair, 2);
        int64_t offset2 = stIntTuple_get(alignedPair, 4);
        int64_t score = stIntTuple_get(alignedPair, 0);
        if(score <= 0) { //Happens when indel probs are included
            score = 1; //This is the minimum
        }
        assert(score > 0 && score <= PAIR_ALIGNMENT_PROB_1);
        SeqFrag *seqFrag1 = stList_get(seqFrags, seqIndex1);
        SeqFrag *seqFrag2 = stList_get(seqFrags, seqIndex2);
        assert(seqFrag1 != seqFrag2);
        double *scoreAdjustments = seqFrag1->rightEndId == seqFrag2->rightEndId ? scoreAdjustmentsCommonEnds : scoreAdjustmentsNonCommonEnds;
        assert(scoreAdjustments[seqIndex1] != INT64_MIN);
        assert(scoreAdjustments[seqIndex2] != INT64_MIN);
        AlignedPair *alignedPair2 = alignedPair_construct(
                i->subsequenceIdentifier, i->start + (i->strand ? offset1 : -offset1), i->strand,
                j->subsequenceIdentifier, j->start + (j->strand ? offset2 : -offset2), j->strand,
                score*scoreAdjustments[seqIndex1], score*scoreAdjustments[seqIndex2]); //Do the reweighting here.
        assert(stSortedSet_search(sortedAlignment, alignedPair2) == NULL);
        assert(stSortedSet_search(sortedAlignment, alignedPair2->reverse) == NULL);
        stSortedSet_insert(sortedAlignment, alignedPair2);
        stSortedSet_insert(sortedAlignment, alignedPair2->reverse);
        stIntTuple_destruct(alignedPair);
    }

    //Cleanup
    stList_destruct(seqFrags);
    stList_destruct(sequences);
    free(pairwiseAlignmentsPerSequenceNonCommonEnds);
    free(pairwiseAlignmentsPerSequenceCommonEnds);
    free(scoreAdjustmentsNonCommonEnds);
    free(scoreAdjustmentsCommonEnds);
    multipleAlignment_destruct(mA);
    stHash_destruct(endInstanceNumbers);

    return sortedAlignment;
}
Segment *getAdjacentCapsSegment(Cap *cap) {
    cap = getTerminalCap(cap);
    cap = cap_getAdjacency(cap);
    assert(cap != NULL);
    return getCapsSegment(cap);
}
int mapGene(Cap *cap, int level, int exon, struct bed *gene, FILE *fileHandle){
   /*
    *Following cactus adjacencies, starting from 'cap', find regions that overlap with 
    *exons of input gene. Report chain relations of these regions with the exons.
    *cap: current cap. Level = chain level. exon = exon number. gene = bed record of gene
    */
   int64_t exonStart, exonEnd;
   if(isStubCap(cap)){
      Group *group = end_getGroup(cap_getEnd(cap));
      Flower *nestedFlower = group_getNestedFlower(group);
      if(nestedFlower != NULL){//recursive call
         Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap));
         assert(childCap != NULL);
         exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
         exonStart = gene->chromStarts->list[exon] + gene->chromStart;
         exonEnd = exonStart + gene->blockSizes->list[exon];
      }
   }

   cap = cap_getAdjacency(cap);
   Cap *nextcap;
   int64_t capCoor;
   exonStart = gene->chromStarts->list[exon] + gene->chromStart;
   exonEnd = exonStart + gene->blockSizes->list[exon];
   Block *block = end_getBlock(cap_getEnd(cap));  
 
   if(block == NULL){
      moveCapToNextBlock(&cap);
   }
   while(!isStubCap(cap) && exon < gene->blockCount){
      End *cend = cap_getEnd(cap);
      capCoor = cap_getCoordinate(cap);//Cap coordinate is always the coordinate on + strand
      nextcap = cap_getAdjacency(cap_getOtherSegmentCap(cap));
      st_logInfo("capCoor: %d, nextCap: %d, eStart: %d, eEnd: %d. Exon: %d\n", 
                  capCoor, cap_getCoordinate(nextcap), exonStart, exonEnd, exon);

      //keep moving if nextBlock Start is still upstream of current exon
      if(cap_getCoordinate(nextcap) <= exonStart){
         moveCapToNextBlock(&cap);
         st_logInfo("Still upstream, nextcap <= exonStart. Move to next chainBlock\n");
      }else if(capCoor >= exonEnd){//Done with current exon, move to next
         st_logInfo("Done with current exon, move to next one\n\n");
         fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
         exon++;
         if(exon < gene->blockCount){
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
            fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
         }
      }else{//current exon overlaps with current block Or with lower level flower
         Cap *oppcap = cap_getOtherSegmentCap(cap);
         st_logInfo("Current exon overlaps with current block or with lower flower\n");
         if(cap_getCoordinate(oppcap) >= exonStart && exonEnd > capCoor){
            mapBlockToExon(cap, level, fileHandle);
            if(exonEnd <= cap_getCoordinate(oppcap) + 1){
               st_logInfo("Done with current exon, move to next one\n\n");
               fprintf(fileHandle, "\t\t</exon>\n");//end previous exon
               exon++;
	       if(exon < gene->blockCount){
		  exonStart = gene->chromStarts->list[exon] + gene->chromStart;
		  exonEnd = exonStart + gene->blockSizes->list[exon];
		  fprintf(fileHandle, "\t\t<exon id=\"%d\" start=\"%" PRIi64 "\" end=\"%" PRIi64 "\">\n", exon, exonStart, exonEnd);
	       }
               continue;
            }
         }
         //Traverse lower level flowers if exists
         Group *group = end_getGroup(end_getOtherBlockEnd(cend));
         Flower *nestedFlower = group_getNestedFlower(group);
         if(nestedFlower != NULL){//recursive call
            Cap *childCap = flower_getCap(nestedFlower, cap_getName(cap_getOtherSegmentCap(cap)));
            assert(childCap != NULL);
            exon = mapGene(childCap, level + 1, exon, gene, fileHandle);
            exonStart = gene->chromStarts->list[exon] + gene->chromStart;
            exonEnd = exonStart + gene->blockSizes->list[exon];
         }
         moveCapToNextBlock(&cap);
      }
   }
   return exon;
}
Beispiel #23
0
int main(int argc, char *argv[]) {
    st_setLogLevelFromString(argv[1]);
    st_logDebug("Set up logging\n");

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logDebug("Set up the flower disk\n");

    Name flowerName = cactusMisc_stringToName(argv[3]);
    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    int64_t totalBases = flower_getTotalBaseLength(flower);
    int64_t totalEnds = flower_getEndNumber(flower);
    int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower);
    int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower);
    int64_t totalCaps = flower_getCapNumber(flower);
    int64_t totalBlocks = flower_getBlockNumber(flower);
    int64_t totalGroups = flower_getGroupNumber(flower);
    int64_t totalChains = flower_getChainNumber(flower);
    int64_t totalLinkGroups = 0;
    int64_t maxEndDegree = 0;
    int64_t maxAdjacencyLength = 0;
    int64_t totalEdges = 0;

    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while((end = flower_getNextEnd(endIt)) != NULL) {
        assert(end_getOrientation(end));
        if(end_getInstanceNumber(end) > maxEndDegree) {
            maxEndDegree = end_getInstanceNumber(end);
        }
        stSortedSet *ends = stSortedSet_construct();
        End_InstanceIterator *capIt = end_getInstanceIterator(end);
        Cap *cap;
        while((cap = end_getNext(capIt)) != NULL) {
            if(cap_getSequence(cap) != NULL) {
                Cap *adjacentCap = cap_getAdjacency(cap);
                assert(adjacentCap != NULL);
                End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap));
                stSortedSet_insert(ends, adjacentEnd);
                int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
                if(adjacencyLength < 0) {
                    adjacencyLength *= -1;
                }
                assert(adjacencyLength >= 1);
                if(adjacencyLength >= maxAdjacencyLength) {
                    maxAdjacencyLength = adjacencyLength;
                }
            }
        }
        end_destructInstanceIterator(capIt);
        totalEdges += stSortedSet_size(ends);
        if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works.
            totalEdges += 1;
        }
        stSortedSet_destruct(ends);
    }
    assert(totalEdges % 2 == 0);
    flower_destructEndIterator(endIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        if(group_getLink(group) != NULL) {
            totalLinkGroups++;
        }
    }
    flower_destructGroupIterator(groupIt);

    printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n",
            flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups);

    return 0;
}