Пример #1
0
static void debugScaffoldPathsP(Cap *cap, stList *haplotypePath,
        stHash *haplotypePathToScaffoldPathHash, stHash *haplotypeToMaximalHaplotypeLengthHash,
        stHash *segmentToMaximalHaplotypePathHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters, bool capDir) {
    int64_t insertLength;
    int64_t deleteLength;
    Cap *otherCap;
    enum CapCode capCode = getCapCode(cap, &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
    if (capCode == SCAFFOLD_GAP || capCode == AMBIGUITY_GAP) {
        Segment *adjacentSegment = getAdjacentCapsSegment(cap);
        assert(adjacentSegment != NULL);
        while (!hasCapInEvents(cap_getEnd(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment)), haplotypeEventStrings)) {
            adjacentSegment = getAdjacentCapsSegment(capDir ? segment_get5Cap(adjacentSegment) : segment_get3Cap(adjacentSegment));
            assert(adjacentSegment != NULL);
        }
        assert(adjacentSegment != NULL);
        assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(adjacentSegment))));
        stIntTuple *j = stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath);
        (void)j;
        assert(j != NULL);
        stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
        if (adjacentHaplotypePath == NULL) {
            adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash,
                    segment_getReverse(adjacentSegment));
        }
        assert(adjacentHaplotypePath != NULL);
        assert(adjacentHaplotypePath != haplotypePath);
        stIntTuple *k = stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath);
        (void)k;
        assert(k != NULL);
        assert(stIntTuple_get(j, 0) == stIntTuple_get(k, 0));
        assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) ==
                stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath));
    }
}
Пример #2
0
void testCap_getSegment(CuTest* testCase) {
    cactusCapTestSetup();
    Block *block = block_construct(2, flower);
    Segment *segment = segment_construct(block, rootEvent);
    CuAssertTrue(testCase, cap_getSegment(segment_get5Cap(segment)) == segment);
    CuAssertTrue(testCase, cap_getSegment(segment_get3Cap(segment)) == segment);
    CuAssertTrue(testCase, cap_getOrientation(segment_get5Cap(segment)) == segment_getOrientation(segment));
    CuAssertTrue(testCase, cap_getOrientation(segment_get3Cap(segment)) == segment_getOrientation(segment));
    CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get5Cap(segment))) == segment_getReverse(segment));
    CuAssertTrue(testCase, cap_getSegment(cap_getReverse(segment_get3Cap(segment))) == segment_getReverse(segment));
    cactusCapTestTeardown();
}
Пример #3
0
void testCap_segmentCoordinatesReverseStrand(CuTest* testCase) {
    /*
     * Tests the coordinates of an segment and its 5 and 3 prime caps.
     */
    cactusCapTestSetup();

    Block *block = block_construct(3, flower);
    Segment *segment = segment_construct2(block, 2, 0, sequence);
    Cap *_5Cap = segment_get5Cap(segment);
    Cap *_3Cap = segment_get3Cap(segment);

    CuAssertTrue(testCase, cap_getSide(_5Cap));
    CuAssertTrue(testCase, !cap_getSide(_3Cap));
    CuAssertTrue(testCase, !cap_getStrand(_5Cap));
    CuAssertTrue(testCase, !cap_getStrand(_3Cap));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(_5Cap));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(_3Cap));
    CuAssertTrue(testCase, !segment_getStrand(segment));
    CuAssertIntEquals(testCase, 4, segment_getStart(segment));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment));

    CuAssertTrue(testCase, !cap_getSide(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getSide(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_5Cap)));
    CuAssertTrue(testCase, cap_getStrand(cap_getReverse(_3Cap)));
    CuAssertIntEquals(testCase, 4, cap_getCoordinate(cap_getReverse(_5Cap)));
    CuAssertIntEquals(testCase, 2, cap_getCoordinate(cap_getReverse(_3Cap)));
    CuAssertTrue(testCase, segment_getStrand(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 2, segment_getStart(segment_getReverse(segment)));
    CuAssertIntEquals(testCase, 3, segment_getLength(segment_getReverse(segment)));

    cactusCapTestTeardown();
}
Пример #4
0
void block_setRootInstance(Block *block, Segment *segment) {
	block = block_getPositiveOrientation(block);
	segment = segment_getPositiveOrientation(segment);
	assert(block_getInstance(block, segment_getName(segment)) == segment);
	end_setRootInstance(block_get5End(block), segment_get5Cap(segment));
	end_setRootInstance(block_get3End(block), segment_get3Cap(segment));
}
Пример #5
0
static void getMaximalHaplotypePathsP2(Segment *segment,
        stList *maximalHaplotypePath, stSortedSet *segmentSet, stList *eventStrings) {
    /*
     * Iterate all the way to one end of the contig then start the traversal to define the maximal
     * haplotype path.
     */
    Cap *_5Cap = segment_get5Cap(segment);
    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
    if (trueAdjacency(_5Cap, eventStrings)) { //Check that the adjacency is supported by a haplotype path
        Segment *otherSegment = getAdjacentCapsSegment(_5Cap);
        assert(segment != otherSegment);
        assert(segment_getReverse(segment) != otherSegment);
        if (otherSegment != NULL) {
            assert(stSortedSet_search(segmentSet, otherSegment) == NULL);
            assert(stSortedSet_search(segmentSet, segment_getReverse(
                    otherSegment)) == NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get3Cap(otherSegment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(otherSegment))));
            getMaximalHaplotypePathsP2(otherSegment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        } else { //We need to start the maximal haplotype recursion
            getMaximalHaplotypePathsP3(segment, maximalHaplotypePath,
                    segmentSet, eventStrings);
        }
    } else {
        getMaximalHaplotypePathsP3(segment, maximalHaplotypePath, segmentSet, eventStrings);
    }
}
void printPositions(stList *positions, const char *substitutionType, FILE *fileHandle) {
    for (int64_t i = 0; i < stList_length(positions); i++) {
        SegmentHolder *segmentHolder = stList_get(positions, i);
        int64_t j = segment_getStart(segmentHolder->segment);
        if (segment_getStrand(segmentHolder->segment)) {
            j += segmentHolder->offset;
            assert(
                    cap_getCoordinate(segment_get5Cap(segmentHolder->segment)) == segment_getStart(
                            segmentHolder->segment));
            assert(
                    segment_getStart(segmentHolder->segment) + segment_getLength(segmentHolder->segment) - 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        } else {
            j -= segmentHolder->offset;
            assert(
                    segment_getStart(segmentHolder->segment) - segment_getLength(segmentHolder->segment) + 1
                            == cap_getCoordinate(segment_get3Cap(segmentHolder->segment)));
        }

        fprintf(fileHandle, "%s: %s_%" PRIi64 " %" PRIi64 " %c %c %c\n", substitutionType,
                event_getHeader(segment_getEvent(segmentHolder->segment)),
                sequence_getLength(segment_getSequence(segmentHolder->segment)), j,
                segmentHolder->base1, segmentHolder->base2, segmentHolder->base3);
        getMAFBlock(segment_getBlock(segmentHolder->segment), fileHandle);
    }
}
Пример #7
0
static void debugScaffoldPaths(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stHash *haplotypeToMaximalHaplotypeLengthHash, stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        //Traversing from 5' end..
        Segment *_5Segment = stList_get(haplotypePath, 0);
        Segment *_3Segment = stList_get(haplotypePath, stList_length(haplotypePath) - 1);
        assert(segment_getStrand(_5Segment) == segment_getStrand(_3Segment));
        if (!segment_getStrand(_5Segment)) {
            Segment *j = _5Segment;
            _5Segment = segment_getReverse(_3Segment);
            _3Segment = segment_getReverse(j);
        }
        assert(segment_getStrand(_5Segment));
        assert(segment_getStrand(_3Segment));
        Cap *_5Cap = segment_get5Cap(_5Segment);
        Cap *_3Cap = segment_get3Cap(_3Segment);
        if (getAdjacentCapsSegment(_5Cap) != NULL) {
            assert(!trueAdjacency(_5Cap, haplotypeEventStrings));
        }
        if (getAdjacentCapsSegment(_3Cap) != NULL) {
            assert(!trueAdjacency(_3Cap, haplotypeEventStrings));
        }
        debugScaffoldPathsP(_5Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 1);
        debugScaffoldPathsP(_3Cap, haplotypePath,
                haplotypePathToScaffoldPathHash, haplotypeToMaximalHaplotypeLengthHash,
                segmentToMaximalHaplotypePathHash, haplotypeEventStrings, contaminationEventStrings, capCodeParameters, 0);
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
}
Пример #8
0
static void getMaximalHaplotypePathsCheck(Flower *flower,
        stSortedSet *segmentSet, const char *eventString, stList *eventStrings) {
    /*
     * Do debug checks that the haplotypes paths are well formed.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (strcmp(event_getHeader(segment_getEvent(segment)), eventString) == 0) {
            if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //isHaplotypeEnd(cap_getEnd(segment_get5Cap(segment)))) {
                assert(stSortedSet_search(segmentSet, segment) != NULL
                        || stSortedSet_search(segmentSet, segment_getReverse(
                                segment)) != NULL);
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsCheck(group_getNestedFlower(group),
                    segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
bool getCapGetAtEndOfPath(Cap *cap, Cap **pathEndCap,
        int64_t *pathLength, int64_t *nCount, stList *haplotypeEventStrings, stList *contaminationEventStrings) {
    //Account for length of adjacency
    *pathLength += getTerminalAdjacencyLength(cap);
    *nCount += getNumberOfNsInAdjacency(cap);

    Segment *segment = getAdjacentCapsSegment(cap);
    if (segment == NULL) {
        *pathEndCap = cap_getAdjacency(getTerminalCap(cap));
        assert(*pathEndCap != NULL);
        return 0;
    }
    Cap *adjacentCap = cap_getSide(cap) ? segment_get3Cap(segment)
    : segment_get5Cap(segment);
    assert(
            cap_getName(adjacentCap) == cap_getName(
                    cap_getAdjacency(getTerminalCap(cap))));

    End *adjacentEnd = cap_getEnd(adjacentCap);
    if (hasCapInEvents(adjacentEnd, contaminationEventStrings) || hasCapInEvents(adjacentEnd, haplotypeEventStrings)) { //hasCapNotInEvent(adjacentEnd, event_getHeader(cap_getEvent(cap)))) { //isContaminationEnd(adjacentEnd) || isHaplotypeEnd(adjacentEnd)) {
        *pathEndCap = adjacentCap;
        return 1;
    }
    *pathLength += segment_getLength(segment);
    *nCount += getNumberOfNsInSegment(segment);
    return getCapGetAtEndOfPath(cap_getOtherSegmentCap(adjacentCap),
            pathEndCap, pathLength, nCount, haplotypeEventStrings, contaminationEventStrings);
}
Пример #10
0
void testBlock_getSetRootInstance(CuTest *testCase) {
    cactusBlockTestSetup();
    Block *block2 = block_construct(1, flower);
    CuAssertTrue(testCase, block_getRootInstance(block2) == NULL);
    block_destruct(block2);
    //block_setRootInstance(block, segment_getReverse(rootSegment)); //set in the constructor code of the test.
    CuAssertTrue(testCase, block_getRootInstance(block) == segment_getReverse(rootSegment));
    CuAssertTrue(testCase, end_getRootInstance(block_get5End(block)) == segment_get5Cap(segment_getReverse(rootSegment)));
    CuAssertTrue(testCase, end_getRootInstance(block_get3End(block)) == segment_get3Cap(segment_getReverse(rootSegment)));
    cactusBlockTestTeardown();
}
Пример #11
0
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference,
        const char *eventString, bool *aligned) {
    assert(segment_getStrand(segmentX));
    assert(segment_getStrand(segmentY));
    *aligned = 0;
    if (segment_getStart(segmentX) < segment_getStart(segmentY)) {
        Block *blockX = segment_getBlock(segmentX);
        Block *blockY = segment_getBlock(segmentY);
        Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX);
        Segment *segmentX2;
        while ((segmentX2 = block_getNext(instanceItX)) != NULL) {
            if (strcmp(event_getHeader(segment_getEvent(segmentX2)),
                    eventString) == 0) {
                Block_InstanceIterator *instanceItY =
                        block_getInstanceIterator(blockY);
                Segment *segmentY2;
                while ((segmentY2 = block_getNext(instanceItY)) != NULL) {
                    if (strcmp(event_getHeader(segment_getEvent(segmentY2)),
                            eventString) == 0) {
                        *aligned = 1;
                        if (sequence_getMetaSequence(
                                segment_getSequence(segmentX2))
                                == sequence_getMetaSequence(
                                        segment_getSequence(segmentY2))) { //Have the same assembly sequence
                            //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY.
                            int64_t separationDistance;
                            if (capsAreAdjacent(segment_get3Cap(segmentX2),
                                    segment_get5Cap(segmentY2),
                                    &separationDistance)) {
                                //if(difference < 10000 || (separationDistance <=  difference * 1.5 && difference <= separationDistance * 1.5)) {
                                block_destructInstanceIterator(instanceItX);
                                block_destructInstanceIterator(instanceItY);
                                return 1;
                                //}
                            }
                        }
                    }
                }
                block_destructInstanceIterator(instanceItY);
            }
        }
        block_destructInstanceIterator(instanceItX);
    } else {
        assert(segmentX == segmentY);
        if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)),
                eventString)) {
            *aligned = 1;
            return 1;
        }
    }
    return 0;
}
Пример #12
0
Segment *block_splitP(Segment *segment,
		Block *leftBlock, Block *rightBlock) {
	Segment *leftSegment = segment_getSequence(segment) != NULL
				? segment_construct2(leftBlock, segment_getStart(segment), segment_getStrand(segment), segment_getSequence(segment))
				: segment_construct(leftBlock, segment_getEvent(segment));
				Segment *rightSegment = segment_getSequence(segment) != NULL
				? segment_construct2(rightBlock, segment_getStart(segment) + block_getLength(leftBlock), segment_getStrand(segment), segment_getSequence(segment))
						: segment_construct(rightBlock, segment_getEvent(segment));
	//link together.
	cap_makeAdjacent(segment_get3Cap(leftSegment), segment_get5Cap(rightSegment));
	//update adjacencies.
	Cap *_5Cap = segment_get5Cap(segment);
	Cap *new5Cap = segment_get5Cap(leftSegment);
	Cap *_3Cap = segment_get3Cap(segment);
	Cap *new3Cap = segment_get3Cap(rightSegment);
	if(cap_getAdjacency(_5Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_5Cap), new5Cap);
	}
	if(cap_getAdjacency(_3Cap) != NULL) {
		cap_makeAdjacent(cap_getAdjacency(_3Cap), new3Cap);
	}
	return leftSegment;
}
Пример #13
0
void testCap_getOtherSegmentCap(CuTest *testCase) {
    cactusCapTestSetup();

    Block *block = block_construct(3, flower);
    Segment *segment = segment_construct2(block, 2, 1, sequence);
    Cap *_5Cap = segment_get5Cap(segment);
    Cap *_3Cap = segment_get3Cap(segment);

    CuAssertTrue(testCase, cap_getOtherSegmentCap(leaf1Cap) == NULL);
    CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(leaf1Cap)) == NULL);

    CuAssertTrue(testCase, cap_getOtherSegmentCap(_5Cap) == _3Cap);
    CuAssertTrue(testCase, cap_getOtherSegmentCap(_3Cap) == _5Cap);

    CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_5Cap)) == cap_getReverse(_3Cap));
    CuAssertTrue(testCase, cap_getOtherSegmentCap(cap_getReverse(_3Cap)) == cap_getReverse(_5Cap));

    cactusCapTestTeardown();
}
static int64_t getBoundingNs(Cap *cap) {
    assert(cap != NULL);
    Segment *segment = getCapsSegment(cap);
    if (segment == NULL) {
        return 0;
    }
    Cap *_5TerminalCap = getTerminalCap(segment_get5Cap(segment));
    Cap *_3TerminalCap = getTerminalCap(segment_get3Cap(segment));
    (void)_3TerminalCap;
    assert(_5TerminalCap != NULL);
    assert(_3TerminalCap != NULL);
    //return 0;
    if (cap_getName(_5TerminalCap) == cap_getName(cap)) {
        return getBoundingNsP(segment);
    } else {
        assert(cap_getName(_3TerminalCap) == cap_getName(cap));
        return getBoundingNsP(segment_getReverse(segment));
    }
}
Пример #15
0
static void getMaximalHaplotypePathsP(Flower *flower,
        stList *maximalHaplotypePaths, stSortedSet *segmentSet,
        const char *eventString,
        stList *eventStrings) {
    /*
     *  Iterate through the segments in this flower.
     */
    Flower_SegmentIterator *segmentIt = flower_getSegmentIterator(flower);
    Segment *segment;
    while ((segment = flower_getNextSegment(segmentIt)) != NULL) {
        if (stSortedSet_search(segmentSet, segment) == NULL
                && stSortedSet_search(segmentSet, segment_getReverse(segment))
                        == NULL) { //Check we haven't yet seen this segment
            if (strcmp(event_getHeader(segment_getEvent(segment)), eventString)
                    == 0) { //Check if the segment is in the assembly
                if (hasCapInEvents(cap_getEnd(segment_get5Cap(segment)), eventStrings)) { //Is a block in a haplotype segment
                    assert(hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                    stList *maximalHaplotypePath = stList_construct();
                    stList_append(maximalHaplotypePaths, maximalHaplotypePath);
                    getMaximalHaplotypePathsP2(segment, maximalHaplotypePath,
                            segmentSet, eventStrings);
                } else {
                    assert(!hasCapInEvents(cap_getEnd(segment_get3Cap(segment)), eventStrings));//assert(!isHaplotypeEnd(cap_getEnd(segment_get3Cap(segment))));
                }
            }
        }
    }
    flower_destructSegmentIterator(segmentIt);
    /*
     * Now recurse on the contained flowers.
     */
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMaximalHaplotypePathsP(group_getNestedFlower(group),
                    maximalHaplotypePaths, segmentSet, eventString, eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Пример #16
0
int64_t flower_getTotalBaseLength(Flower *flower) {
    /*
     * The implementation of this function is very like that in group_getTotalBaseLength, with a few differences. Consider merging them.
     */
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    int64_t totalLength = 0;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        if (!end_isBlockEnd(end)) {
            End_InstanceIterator *instanceIterator = end_getInstanceIterator(end);
            Cap *cap;
            while ((cap = end_getNext(instanceIterator)) != NULL) {
                cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                if (!cap_getSide(cap) && cap_getSequence(cap) != NULL) {
                    Cap *cap2 = cap_getAdjacency(cap);
                    assert(cap2 != NULL);
                    while (end_isBlockEnd(cap_getEnd(cap2))) {
                        Segment *segment = cap_getSegment(cap2);
                        assert(segment != NULL);
                        assert(segment_get5Cap(segment) == cap2);
                        cap2 = cap_getAdjacency(segment_get3Cap(segment));
                        assert(cap2 != NULL);
                        assert(cap_getStrand(cap2));
                        assert(cap_getSide(cap2));
                    }
                    assert(cap_getStrand(cap2));
                    assert(cap_getSide(cap2));
                    int64_t length = cap_getCoordinate(cap2) - cap_getCoordinate(cap) - 1;
                    assert(length >= 0);
                    totalLength += length;
                }
            }
            end_destructInstanceIterator(instanceIterator);
        }
    }
    flower_destructEndIterator(endIterator);
    return totalLength;
}
Пример #17
0
stList *getContigPaths(Flower *flower, const char *eventString, stList *eventStrings) {
    stList *maximalHaplotypePaths = stList_construct3(0,
            (void(*)(void *)) stList_destruct);
    stSortedSet *segmentSet = stSortedSet_construct();
    getMaximalHaplotypePathsP(flower, maximalHaplotypePaths, segmentSet, eventString, eventStrings);

    //Do some debug checks..
    st_logDebug("We have %" PRIi64 " maximal haplotype paths\n", stList_length(
            maximalHaplotypePaths));
    getMaximalHaplotypePathsCheck(flower, segmentSet, eventString, eventStrings);
    for (int64_t i = 0; i < stList_length(maximalHaplotypePaths); i++) {
        stList *maximalHaplotypePath = stList_get(maximalHaplotypePaths, i);
        st_logDebug("We have a maximal haplotype path with length %" PRIi64 "\n",
                stList_length(maximalHaplotypePath));
        assert(stList_length(maximalHaplotypePath) > 0);
        Segment *_5Segment = stList_get(maximalHaplotypePath, 0);
        Segment *_3Segment = stList_get(maximalHaplotypePath, stList_length(
                maximalHaplotypePath) - 1);
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), eventStrings));
        }
        if (getAdjacentCapsSegment(segment_get3Cap(_3Segment)) != NULL) {
            assert(!trueAdjacency(segment_get3Cap(_3Segment), eventStrings));
        }
        for (int64_t j = 0; j < stList_length(maximalHaplotypePath) - 1; j++) {
            _5Segment = stList_get(maximalHaplotypePath, j);
            _3Segment = stList_get(maximalHaplotypePath, j + 1);
            assert(trueAdjacency(segment_get3Cap(_5Segment), eventStrings));
            assert(trueAdjacency(segment_get5Cap(_3Segment), eventStrings));
            assert(cap_getAdjacency(getTerminalCap(segment_get3Cap(_5Segment)))
                    == getTerminalCap(segment_get5Cap(_3Segment)));
            assert(strcmp(event_getHeader(segment_getEvent(_5Segment)),
                   eventString) == 0);
            assert(strcmp(event_getHeader(segment_getEvent(_3Segment)),
                    eventString) == 0);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_5Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_5Segment))));
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(_3Segment)), eventStrings)); //isHaplotypeEnd(cap_getEnd(segment_get5Cap(_3Segment))));
        }
    }

    stSortedSet_destruct(segmentSet);

    return maximalHaplotypePaths;
}
Пример #18
0
static stHash *getScaffoldPathsP(stList *haplotypePaths, stHash *haplotypePathToScaffoldPathHash,
        stList *haplotypeEventStrings, stList *contaminationEventStrings, CapCodeParameters *capCodeParameters) {
    stHash *haplotypeToMaximalHaplotypeLengthHash = buildContigPathToContigPathLengthHash(haplotypePaths);
    stHash *segmentToMaximalHaplotypePathHash = buildSegmentToContigPathHash(haplotypePaths);
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stSortedSet *bucket = stSortedSet_construct();
        stHash_insert(haplotypePathToScaffoldPathHash, stList_get(haplotypePaths, i), bucket);
        stSortedSet_insert(bucket, stList_get(haplotypePaths, i));
    }
    for (int64_t i = 0; i < stList_length(haplotypePaths); i++) {
        stList *haplotypePath = stList_get(haplotypePaths, i);
        assert(stList_length(haplotypePath) > 0);
        Segment *_5Segment = stList_get(haplotypePath, 0);
        if (!segment_getStrand(_5Segment)) {
            _5Segment = segment_getReverse(stList_get(haplotypePath, stList_length(haplotypePath) - 1));
        }
        assert(segment_getStrand(_5Segment));
        if (getAdjacentCapsSegment(segment_get5Cap(_5Segment)) != NULL) {
            assert(!trueAdjacency(segment_get5Cap(_5Segment), haplotypeEventStrings));
        }
        int64_t insertLength;
        int64_t deleteLength;
        Cap *otherCap;
        enum CapCode _5CapCode = getCapCode(segment_get5Cap(_5Segment), &otherCap, haplotypeEventStrings, contaminationEventStrings, &insertLength, &deleteLength, capCodeParameters);
        if (_5CapCode == SCAFFOLD_GAP || _5CapCode == AMBIGUITY_GAP) {
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath) != NULL);
            int64_t j = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, haplotypePath), 0);
            Segment *adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(_5Segment));
            assert(adjacentSegment != NULL);
            while (!hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)) { //is not a haplotype end
                adjacentSegment = getAdjacentCapsSegment(segment_get5Cap(adjacentSegment));
                assert(adjacentSegment != NULL);
            }
            assert(adjacentSegment != NULL);
            assert(hasCapInEvents(cap_getEnd(segment_get5Cap(adjacentSegment)), haplotypeEventStrings)); //is a haplotype end
            stList *adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, adjacentSegment);
            if (adjacentHaplotypePath == NULL) {
                adjacentHaplotypePath = stHash_search(segmentToMaximalHaplotypePathHash, segment_getReverse(
                        adjacentSegment));
            }
            assert(adjacentHaplotypePath != NULL);
            assert(adjacentHaplotypePath != haplotypePath);
            assert(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath) != NULL);
            int64_t k = stIntTuple_get(stHash_search(haplotypeToMaximalHaplotypeLengthHash, adjacentHaplotypePath), 0);

            //Now merge the buckets and make new int tuples..
            stSortedSet *bucket1 = stHash_search(haplotypePathToScaffoldPathHash, haplotypePath);
            stSortedSet *bucket2 = stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath);
            assert(bucket1 != NULL);
            assert(bucket2 != NULL);
            assert(bucket1 != bucket2);
            stSortedSet *bucket3 = stSortedSet_getUnion(bucket1, bucket2);
            stSortedSetIterator *bucketIt = stSortedSet_getIterator(bucket3);
            stList *l;
            while ((l = stSortedSet_getNext(bucketIt)) != NULL) {
                //Do the bucket first
                assert(stHash_search(haplotypePathToScaffoldPathHash, l) == bucket1 || stHash_search(haplotypePathToScaffoldPathHash, l) == bucket2);
                stHash_remove(haplotypePathToScaffoldPathHash, l);
                stHash_insert(haplotypePathToScaffoldPathHash, l, bucket3);
                //Now the length
                stIntTuple *m = stHash_remove(haplotypeToMaximalHaplotypeLengthHash, l);
                assert(m != NULL);
                assert(stIntTuple_get(m, 0) == j || stIntTuple_get(m, 0) == k);
                stHash_insert(haplotypeToMaximalHaplotypeLengthHash, l, stIntTuple_construct1( j + k));
                stIntTuple_destruct(m);
            }
            assert(stHash_search(haplotypePathToScaffoldPathHash, haplotypePath) == bucket3);
            assert(stHash_search(haplotypePathToScaffoldPathHash, adjacentHaplotypePath) == bucket3);
            stSortedSet_destructIterator(bucketIt);
        }
    }
    stHash_destruct(segmentToMaximalHaplotypePathHash);
    return haplotypeToMaximalHaplotypeLengthHash;
}